git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* Git archive doesn't fully support zip64
@ 2017-04-21 21:08 Keith Goldfarb
  2017-04-22 19:22 ` [PATCH] archive-zip: Add zip64 headers when file size is too large for 32 bits Peter Krefting
  0 siblings, 1 reply; 44+ messages in thread
From: Keith Goldfarb @ 2017-04-21 21:08 UTC (permalink / raw)
  To: git

Dear git,

git archive, when writing a zip file, has a silent 4GB file size limit (on the inputs as well as the output), as it doesn’t fully support zip64.

Although a zip archive written by git which is larger than 4GB can be often still be unzipped, it won’t be fully useable and some tools (e.g. zipdetails) can’t read it at all.

I suggest either git should be changed to fully support zip64 or it should give a proper error when either an input or the output is too large.

Thanks,

K.



^ permalink raw reply	[flat|nested] 44+ messages in thread

* [PATCH] archive-zip: Add zip64 headers when file size is too large for 32 bits
  2017-04-21 21:08 Git archive doesn't fully support zip64 Keith Goldfarb
@ 2017-04-22 19:22 ` Peter Krefting
  2017-04-22 21:52   ` Johannes Sixt
  0 siblings, 1 reply; 44+ messages in thread
From: Peter Krefting @ 2017-04-22 19:22 UTC (permalink / raw)
  To: git; +Cc: Keith Goldfarb

If the size of the files in the archive cannot be expressed in 32 bits, or
the offset in the zip file itself, add zip64 local headers with the actual
size. If we do find such entries, we also set a flag to force the creation
of a zip64 end of central directory record.

Signed-off-by: Peter Krefting <peter@softwolves.pp.se>
---
  archive-zip.c | 50 +++++++++++++++++++++++++++++++++++++++++---------
  1 file changed, 41 insertions(+), 9 deletions(-)

> git archive, when writing a zip file, has a silent 4GB file size 
> limit (on the inputs as well as the output), as it doesn’t fully 
> support zip64.

Yeah, it seems that the zip64 support that was added was to support 
more than 65535 files, but it did not add support for 64-bit file 
sizes or ZIP archives.

Try the below patch, it seems to work for me with a repository with 
two files of 4 Gbyte plus a few bytes. Haven't tested the case where 
the archive itself is larger than 4 Gbyte, but that ought to work, too.

diff --git a/archive-zip.c b/archive-zip.c
index b429a8d..c76a9b4 100644
--- a/archive-zip.c
+++ b/archive-zip.c
@@ -10,6 +10,7 @@

  static int zip_date;
  static int zip_time;
+static int zip_zip64;

  static unsigned char *zip_dir;
  static unsigned int zip_dir_size;
@@ -88,6 +89,16 @@ struct zip_extra_mtime {
  	unsigned char _end[1];
  };

+struct zip_extra_zip64 {
+	unsigned char magic[2];
+	unsigned char extra_size[2];
+	unsigned char size[8];
+	unsigned char compressed_size[8];
+	unsigned char offset[8];
+	unsigned char disk[4];
+	unsigned char _end[1];
+};
+
  struct zip64_dir_trailer {
  	unsigned char magic[4];
  	unsigned char record_size[8];
@@ -122,6 +133,9 @@ struct zip64_dir_trailer_locator {
  #define ZIP_EXTRA_MTIME_SIZE	offsetof(struct zip_extra_mtime, _end)
  #define ZIP_EXTRA_MTIME_PAYLOAD_SIZE \
  	(ZIP_EXTRA_MTIME_SIZE - offsetof(struct zip_extra_mtime, flags))
+#define ZIP_EXTRA_ZIP64_SIZE	offsetof(struct zip_extra_zip64, _end)
+#define ZIP_EXTRA_ZIP64_PAYLOAD_SIZE \
+	(ZIP_EXTRA_ZIP64_SIZE - offsetof(struct zip_extra_zip64, size))
  #define ZIP64_DIR_TRAILER_SIZE	offsetof(struct zip64_dir_trailer, _end)
  #define ZIP64_DIR_TRAILER_RECORD_SIZE \
  	(ZIP64_DIR_TRAILER_SIZE - \
@@ -219,19 +233,25 @@ static void set_zip_dir_data_desc(struct zip_dir_header *header,
  				  unsigned long compressed_size,
  				  unsigned long crc)
  {
+	int clamped = 0;
  	copy_le32(header->crc32, crc);
-	copy_le32(header->compressed_size, compressed_size);
-	copy_le32(header->size, size);
+	copy_le32(header->compressed_size, clamp_max(compressed_size, 0xFFFFFFFFU, &clamped));
+	copy_le32(header->size, clamp_max(size, 0xFFFFFFFFU, &clamped));
+	if (clamped)
+		zip_zip64 = 1;
  }

  static void set_zip_header_data_desc(struct zip_local_header *header,
  				     unsigned long size,
  				     unsigned long compressed_size,
-				     unsigned long crc)
+				     unsigned long crc,
+				     int *clamped)
  {
  	copy_le32(header->crc32, crc);
-	copy_le32(header->compressed_size, compressed_size);
-	copy_le32(header->size, size);
+	copy_le32(header->compressed_size, clamp_max(compressed_size, 0xFFFFFFFFU, clamped));
+	copy_le32(header->size, clamp_max(size, 0xFFFFFFFFU, clamped));
+	if (clamped)
+		zip_zip64 = 1;
  }

  static int has_only_ascii(const char *s)
@@ -279,6 +299,7 @@ static int write_zip_entry(struct archiver_args *args,
  	int is_binary = -1;
  	const char *path_without_prefix = path + args->baselen;
  	unsigned int creator_version = 0;
+	int clamped = 0;

  	crc = crc32(0, NULL, 0);

@@ -376,7 +397,7 @@ static int write_zip_entry(struct archiver_args *args,
  	copy_le16(dirent.comment_length, 0);
  	copy_le16(dirent.disk, 0);
  	copy_le32(dirent.attr2, attr2);
-	copy_le32(dirent.offset, zip_offset);
+	copy_le32(dirent.offset, clamp_max(zip_offset, 0xFFFFFFFFU, &clamped));

  	copy_le32(header.magic, 0x04034b50);
  	copy_le16(header.version, 10);
@@ -384,15 +405,26 @@ static int write_zip_entry(struct archiver_args *args,
  	copy_le16(header.compression_method, method);
  	copy_le16(header.mtime, zip_time);
  	copy_le16(header.mdate, zip_date);
-	set_zip_header_data_desc(&header, size, compressed_size, crc);
+	set_zip_header_data_desc(&header, size, compressed_size, crc, &clamped);
  	copy_le16(header.filename_length, pathlen);
-	copy_le16(header.extra_length, ZIP_EXTRA_MTIME_SIZE);
+	copy_le16(header.extra_length, ZIP_EXTRA_MTIME_SIZE + (clamped ? ZIP_EXTRA_ZIP64_SIZE : 0));
  	write_or_die(1, &header, ZIP_LOCAL_HEADER_SIZE);
  	zip_offset += ZIP_LOCAL_HEADER_SIZE;
  	write_or_die(1, path, pathlen);
  	zip_offset += pathlen;
  	write_or_die(1, &extra, ZIP_EXTRA_MTIME_SIZE);
  	zip_offset += ZIP_EXTRA_MTIME_SIZE;
+	if (clamped) {
+		struct zip_extra_zip64 extra_zip64;
+		copy_le16(extra_zip64.magic, 0x0001);
+		copy_le16(extra_zip64.extra_size, ZIP_EXTRA_ZIP64_PAYLOAD_SIZE);
+		copy_le64(extra_zip64.size, size);
+		copy_le64(extra_zip64.compressed_size, compressed_size);
+		copy_le64(extra_zip64.offset, zip_offset);
+		copy_le32(extra_zip64.disk, 0);
+		write_or_die(1, &extra_zip64, ZIP_EXTRA_ZIP64_SIZE);
+		zip_offset += ZIP_EXTRA_ZIP64_SIZE;
+	}
  	if (stream && method == 0) {
  		unsigned char buf[STREAM_BUFFER_SIZE];
  		ssize_t readlen;
@@ -538,7 +570,7 @@ static void write_zip_trailer(const unsigned char *sha1)
  	copy_le16(trailer.comment_length, sha1 ? GIT_SHA1_HEXSZ : 0);

  	write_or_die(1, zip_dir, zip_dir_offset);
-	if (clamped)
+	if (clamped || zip_zip64)
  		write_zip64_trailer();
  	write_or_die(1, &trailer, ZIP_DIR_TRAILER_SIZE);
  	if (sha1)
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 44+ messages in thread

* Re: [PATCH] archive-zip: Add zip64 headers when file size is too large for 32 bits
  2017-04-22 19:22 ` [PATCH] archive-zip: Add zip64 headers when file size is too large for 32 bits Peter Krefting
@ 2017-04-22 21:52   ` Johannes Sixt
  2017-04-22 22:41     ` [PATCH v2] " Peter Krefting
  2017-04-23  0:16     ` [PATCH] archive-zip: Add zip64 headers when file size is too large for 32 bits René Scharfe
  0 siblings, 2 replies; 44+ messages in thread
From: Johannes Sixt @ 2017-04-22 21:52 UTC (permalink / raw)
  To: Peter Krefting; +Cc: git, Keith Goldfarb

Am 22.04.2017 um 21:22 schrieb Peter Krefting:
> @@ -279,6 +299,7 @@ static int write_zip_entry(struct archiver_args *args,
>      int is_binary = -1;
>      const char *path_without_prefix = path + args->baselen;
>      unsigned int creator_version = 0;
> +    int clamped = 0;
>
>      crc = crc32(0, NULL, 0);
>
> @@ -376,7 +397,7 @@ static int write_zip_entry(struct archiver_args *args,
>      copy_le16(dirent.comment_length, 0);
>      copy_le16(dirent.disk, 0);
>      copy_le32(dirent.attr2, attr2);
> -    copy_le32(dirent.offset, zip_offset);
> +    copy_le32(dirent.offset, clamp_max(zip_offset, 0xFFFFFFFFU,
> &clamped));
>
>      copy_le32(header.magic, 0x04034b50);
>      copy_le16(header.version, 10);
> @@ -384,15 +405,26 @@ static int write_zip_entry(struct archiver_args
> *args,
>      copy_le16(header.compression_method, method);
>      copy_le16(header.mtime, zip_time);
>      copy_le16(header.mdate, zip_date);
> -    set_zip_header_data_desc(&header, size, compressed_size, crc);
> +    set_zip_header_data_desc(&header, size, compressed_size, crc,
> &clamped);
>      copy_le16(header.filename_length, pathlen);
> -    copy_le16(header.extra_length, ZIP_EXTRA_MTIME_SIZE);
> +    copy_le16(header.extra_length, ZIP_EXTRA_MTIME_SIZE + (clamped ?
> ZIP_EXTRA_ZIP64_SIZE : 0));
>      write_or_die(1, &header, ZIP_LOCAL_HEADER_SIZE);
>      zip_offset += ZIP_LOCAL_HEADER_SIZE;
>      write_or_die(1, path, pathlen);
>      zip_offset += pathlen;
>      write_or_die(1, &extra, ZIP_EXTRA_MTIME_SIZE);
>      zip_offset += ZIP_EXTRA_MTIME_SIZE;
> +    if (clamped) {
> +        struct zip_extra_zip64 extra_zip64;
> +        copy_le16(extra_zip64.magic, 0x0001);
> +        copy_le16(extra_zip64.extra_size, ZIP_EXTRA_ZIP64_PAYLOAD_SIZE);
> +        copy_le64(extra_zip64.size, size);
> +        copy_le64(extra_zip64.compressed_size, compressed_size);
> +        copy_le64(extra_zip64.offset, zip_offset);
> +        copy_le32(extra_zip64.disk, 0);
> +        write_or_die(1, &extra_zip64, ZIP_EXTRA_ZIP64_SIZE);
> +        zip_offset += ZIP_EXTRA_ZIP64_SIZE;

Is this correct? Not all of the zip64 extra fields are always populated. 
Only those whose regular fields are filled with 0xffffffff must be 
present. Since there is only one flag, it is not possible to know which 
of the fields must be filled in.

Readers will most likely ignore trailing fields that should not be 
there; however, when the offset exceeds 32 bits, but not the compressed 
size, readers will pick the compressed size and interpret it as offset.

> +    }

-- Hannes


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [PATCH v2] archive-zip: Add zip64 headers when file size is too large for 32 bits
  2017-04-22 21:52   ` Johannes Sixt
@ 2017-04-22 22:41     ` Peter Krefting
  2017-04-23  7:50       ` Johannes Sixt
  2017-04-23  0:16     ` [PATCH] archive-zip: Add zip64 headers when file size is too large for 32 bits René Scharfe
  1 sibling, 1 reply; 44+ messages in thread
From: Peter Krefting @ 2017-04-22 22:41 UTC (permalink / raw)
  To: git; +Cc: Johannes Sixt, Keith Goldfarb

If the size of the files in the archive cannot be expressed in 32 bits, or
the offset in the zip file itself, add zip64 local headers with the actual
size. If we do find such entries, we also set a flag to force the creation
of a zip64 end of central directory record.

Signed-off-by: Peter Krefting <peter@softwolves.pp.se>
---
 archive-zip.c | 50 +++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 41 insertions(+), 9 deletions(-)

> Is this correct? Not all of the zip64 extra fields are always populated. 
> Only those whose regular fields are filled with 0xffffffff must be 
> present.

Indeed. Last time I implemented zip64 support it was on the reading side,
and I remember this was a mess...

diff --git a/archive-zip.c b/archive-zip.c
index b429a8d97..50c7ab005 100644
--- a/archive-zip.c
+++ b/archive-zip.c
@@ -10,6 +10,7 @@
 
 static int zip_date;
 static int zip_time;
+static int zip_zip64;
 
 static unsigned char *zip_dir;
 static unsigned int zip_dir_size;
@@ -88,6 +89,16 @@ struct zip_extra_mtime {
 	unsigned char _end[1];
 };
 
+struct zip_extra_zip64 {
+	unsigned char magic[2];
+	unsigned char extra_size[2];
+	unsigned char size[8];
+	unsigned char compressed_size[8];
+	unsigned char offset[8];
+	unsigned char disk[4];
+	unsigned char _end[1];
+};
+
 struct zip64_dir_trailer {
 	unsigned char magic[4];
 	unsigned char record_size[8];
@@ -122,6 +133,9 @@ struct zip64_dir_trailer_locator {
 #define ZIP_EXTRA_MTIME_SIZE	offsetof(struct zip_extra_mtime, _end)
 #define ZIP_EXTRA_MTIME_PAYLOAD_SIZE \
 	(ZIP_EXTRA_MTIME_SIZE - offsetof(struct zip_extra_mtime, flags))
+#define ZIP_EXTRA_ZIP64_SIZE	offsetof(struct zip_extra_zip64, _end)
+#define ZIP_EXTRA_ZIP64_PAYLOAD_SIZE \
+	(ZIP_EXTRA_ZIP64_SIZE - offsetof(struct zip_extra_zip64, size))
 #define ZIP64_DIR_TRAILER_SIZE	offsetof(struct zip64_dir_trailer, _end)
 #define ZIP64_DIR_TRAILER_RECORD_SIZE \
 	(ZIP64_DIR_TRAILER_SIZE - \
@@ -219,19 +233,25 @@ static void set_zip_dir_data_desc(struct zip_dir_header *header,
 				  unsigned long compressed_size,
 				  unsigned long crc)
 {
+	int clamped = 0;
 	copy_le32(header->crc32, crc);
-	copy_le32(header->compressed_size, compressed_size);
-	copy_le32(header->size, size);
+	copy_le32(header->compressed_size, clamp_max(compressed_size, 0xFFFFFFFFU, &clamped));
+	copy_le32(header->size, clamp_max(size, 0xFFFFFFFFU, &clamped));
+	if (clamped)
+		zip_zip64 = 1;
 }
 
 static void set_zip_header_data_desc(struct zip_local_header *header,
 				     unsigned long size,
 				     unsigned long compressed_size,
-				     unsigned long crc)
+				     unsigned long crc,
+				     int *clamped)
 {
 	copy_le32(header->crc32, crc);
-	copy_le32(header->compressed_size, compressed_size);
-	copy_le32(header->size, size);
+	copy_le32(header->compressed_size, clamp_max(compressed_size, 0xFFFFFFFFU, clamped));
+	copy_le32(header->size, clamp_max(size, 0xFFFFFFFFU, clamped));
+	if (clamped)
+		zip_zip64 = 1;
 }
 
 static int has_only_ascii(const char *s)
@@ -279,6 +299,7 @@ static int write_zip_entry(struct archiver_args *args,
 	int is_binary = -1;
 	const char *path_without_prefix = path + args->baselen;
 	unsigned int creator_version = 0;
+	int clamped = 0;
 
 	crc = crc32(0, NULL, 0);
 
@@ -376,7 +397,7 @@ static int write_zip_entry(struct archiver_args *args,
 	copy_le16(dirent.comment_length, 0);
 	copy_le16(dirent.disk, 0);
 	copy_le32(dirent.attr2, attr2);
-	copy_le32(dirent.offset, zip_offset);
+	copy_le32(dirent.offset, clamp_max(zip_offset, 0xFFFFFFFFU, &clamped));
 
 	copy_le32(header.magic, 0x04034b50);
 	copy_le16(header.version, 10);
@@ -384,15 +405,26 @@ static int write_zip_entry(struct archiver_args *args,
 	copy_le16(header.compression_method, method);
 	copy_le16(header.mtime, zip_time);
 	copy_le16(header.mdate, zip_date);
-	set_zip_header_data_desc(&header, size, compressed_size, crc);
+	set_zip_header_data_desc(&header, size, compressed_size, crc, &clamped);
 	copy_le16(header.filename_length, pathlen);
-	copy_le16(header.extra_length, ZIP_EXTRA_MTIME_SIZE);
+	copy_le16(header.extra_length, ZIP_EXTRA_MTIME_SIZE + (clamped ? ZIP_EXTRA_ZIP64_SIZE : 0));
 	write_or_die(1, &header, ZIP_LOCAL_HEADER_SIZE);
 	zip_offset += ZIP_LOCAL_HEADER_SIZE;
 	write_or_die(1, path, pathlen);
 	zip_offset += pathlen;
 	write_or_die(1, &extra, ZIP_EXTRA_MTIME_SIZE);
 	zip_offset += ZIP_EXTRA_MTIME_SIZE;
+	if (clamped) {
+		struct zip_extra_zip64 extra_zip64;
+		copy_le16(extra_zip64.magic, 0x0001);
+		copy_le16(extra_zip64.extra_size, ZIP_EXTRA_ZIP64_PAYLOAD_SIZE);
+		copy_le64(extra_zip64.size, size >= 0xFFFFFFFFU ? size : 0);
+		copy_le64(extra_zip64.compressed_size, compressed_size >= 0xFFFFFFFFU ? compressed_size : 0);
+		copy_le64(extra_zip64.offset, zip_offset >= 0xFFFFFFFFU ? zip_offset : 0);
+		copy_le32(extra_zip64.disk, 0);
+		write_or_die(1, &extra_zip64, ZIP_EXTRA_ZIP64_SIZE);
+		zip_offset += ZIP_EXTRA_ZIP64_SIZE;
+	}
 	if (stream && method == 0) {
 		unsigned char buf[STREAM_BUFFER_SIZE];
 		ssize_t readlen;
@@ -538,7 +570,7 @@ static void write_zip_trailer(const unsigned char *sha1)
 	copy_le16(trailer.comment_length, sha1 ? GIT_SHA1_HEXSZ : 0);
 
 	write_or_die(1, zip_dir, zip_dir_offset);
-	if (clamped)
+	if (clamped || zip_zip64)
 		write_zip64_trailer();
 	write_or_die(1, &trailer, ZIP_DIR_TRAILER_SIZE);
 	if (sha1)
-- 
2.12.2

^ permalink raw reply related	[flat|nested] 44+ messages in thread

* Re: [PATCH] archive-zip: Add zip64 headers when file size is too large for 32 bits
  2017-04-22 21:52   ` Johannes Sixt
  2017-04-22 22:41     ` [PATCH v2] " Peter Krefting
@ 2017-04-23  0:16     ` René Scharfe
  2017-04-23  6:42       ` Peter Krefting
  1 sibling, 1 reply; 44+ messages in thread
From: René Scharfe @ 2017-04-23  0:16 UTC (permalink / raw)
  To: Johannes Sixt, Peter Krefting; +Cc: git, Keith Goldfarb

Am 22.04.2017 um 23:52 schrieb Johannes Sixt:
> Am 22.04.2017 um 21:22 schrieb Peter Krefting:
>> @@ -279,6 +299,7 @@ static int write_zip_entry(struct archiver_args 
>> *args,
>>      int is_binary = -1;
>>      const char *path_without_prefix = path + args->baselen;
>>      unsigned int creator_version = 0;
>> +    int clamped = 0;
>>
>>      crc = crc32(0, NULL, 0);
>>
>> @@ -376,7 +397,7 @@ static int write_zip_entry(struct archiver_args 
>> *args,
>>      copy_le16(dirent.comment_length, 0);
>>      copy_le16(dirent.disk, 0);
>>      copy_le32(dirent.attr2, attr2);
>> -    copy_le32(dirent.offset, zip_offset);
>> +    copy_le32(dirent.offset, clamp_max(zip_offset, 0xFFFFFFFFU,
>> &clamped));
>>
>>      copy_le32(header.magic, 0x04034b50);
>>      copy_le16(header.version, 10);
>> @@ -384,15 +405,26 @@ static int write_zip_entry(struct archiver_args
>> *args,
>>      copy_le16(header.compression_method, method);
>>      copy_le16(header.mtime, zip_time);
>>      copy_le16(header.mdate, zip_date);
>> -    set_zip_header_data_desc(&header, size, compressed_size, crc);
>> +    set_zip_header_data_desc(&header, size, compressed_size, crc,
>> &clamped);
>>      copy_le16(header.filename_length, pathlen);
>> -    copy_le16(header.extra_length, ZIP_EXTRA_MTIME_SIZE);
>> +    copy_le16(header.extra_length, ZIP_EXTRA_MTIME_SIZE + (clamped ?
>> ZIP_EXTRA_ZIP64_SIZE : 0));
>>      write_or_die(1, &header, ZIP_LOCAL_HEADER_SIZE);
>>      zip_offset += ZIP_LOCAL_HEADER_SIZE;
>>      write_or_die(1, path, pathlen);
>>      zip_offset += pathlen;
>>      write_or_die(1, &extra, ZIP_EXTRA_MTIME_SIZE);
>>      zip_offset += ZIP_EXTRA_MTIME_SIZE;
>> +    if (clamped) {
>> +        struct zip_extra_zip64 extra_zip64;
>> +        copy_le16(extra_zip64.magic, 0x0001);
>> +        copy_le16(extra_zip64.extra_size, ZIP_EXTRA_ZIP64_PAYLOAD_SIZE);
>> +        copy_le64(extra_zip64.size, size);
>> +        copy_le64(extra_zip64.compressed_size, compressed_size);
>> +        copy_le64(extra_zip64.offset, zip_offset);
>> +        copy_le32(extra_zip64.disk, 0);
>> +        write_or_die(1, &extra_zip64, ZIP_EXTRA_ZIP64_SIZE);
>> +        zip_offset += ZIP_EXTRA_ZIP64_SIZE;
> 
> Is this correct? Not all of the zip64 extra fields are always populated. 
> Only those whose regular fields are filled with 0xffffffff must be 
> present. Since there is only one flag, it is not possible to know which 
> of the fields must be filled in.
> 
> Readers will most likely ignore trailing fields that should not be 
> there; however, when the offset exceeds 32 bits, but not the compressed 
> size, readers will pick the compressed size and interpret it as offset.

The offset is declared as unsigned int, so will wrap on most platforms
before reaching the clamp check.  At least InfoZIP's unzip can handle
that, but it's untidy.

The offset is only needed in the ZIP64 extra record for the central
header (in zip_dir) -- the local header has no offset field.  That said,
I haven't been able to implement proper 64 bit offset support so far.

René

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH] archive-zip: Add zip64 headers when file size is too large for 32 bits
  2017-04-23  0:16     ` [PATCH] archive-zip: Add zip64 headers when file size is too large for 32 bits René Scharfe
@ 2017-04-23  6:42       ` Peter Krefting
  2017-04-23  7:27         ` Johannes Sixt
  0 siblings, 1 reply; 44+ messages in thread
From: Peter Krefting @ 2017-04-23  6:42 UTC (permalink / raw)
  To: René Scharfe; +Cc: Johannes Sixt, git, Keith Goldfarb

René Scharfe:

> The offset is declared as unsigned int, so will wrap on most platforms
> before reaching the clamp check.  At least InfoZIP's unzip can handle
> that, but it's untidy.

Right, that needs to be changed into unsigned long and clamped, just 
like the original and compressed file sizes already are.

> The offset is only needed in the ZIP64 extra record for the central 
> header (in zip_dir) -- the local header has no offset field.

The zip64 local header does have an offset field, though. I thought 
that was the zip_offset value, but that doesn't make sense, I'm not 
quite sure what it is supposed to store. I need to investigate that 
further, I assume.

-- 
\\// Peter - http://www.softwolves.pp.se/

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH] archive-zip: Add zip64 headers when file size is too large for 32 bits
  2017-04-23  6:42       ` Peter Krefting
@ 2017-04-23  7:27         ` Johannes Sixt
  0 siblings, 0 replies; 44+ messages in thread
From: Johannes Sixt @ 2017-04-23  7:27 UTC (permalink / raw)
  To: Peter Krefting, René Scharfe; +Cc: git, Keith Goldfarb

Am 23.04.2017 um 08:42 schrieb Peter Krefting:
> René Scharfe:
>> The offset is only needed in the ZIP64 extra record for the central
>> header (in zip_dir) -- the local header has no offset field.

Good point.

> The zip64 local header does have an offset field, though. I thought that
> was the zip_offset value, but that doesn't make sense, I'm not quite
> sure what it is supposed to store. I need to investigate that further, I
> assume.

Let's get the naming straight: There is no "zip64 local header". There 
is a "zip64 extra record" for the "zip local header". The zip64 extra 
data record has an offset field, but since the local header does not 
have an offset field, the offset field in the corresponding zip64 extra 
data record is always omitted.

-- Hannes


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v2] archive-zip: Add zip64 headers when file size is too large for 32 bits
  2017-04-22 22:41     ` [PATCH v2] " Peter Krefting
@ 2017-04-23  7:50       ` Johannes Sixt
  2017-04-23 14:51         ` Peter Krefting
  0 siblings, 1 reply; 44+ messages in thread
From: Johannes Sixt @ 2017-04-23  7:50 UTC (permalink / raw)
  To: Peter Krefting, git; +Cc: Keith Goldfarb

Am 23.04.2017 um 00:41 schrieb Peter Krefting:
> Indeed. Last time I implemented zip64 support it was on the reading side,
> and I remember this was a mess...

It is indeed!

>  static void set_zip_header_data_desc(struct zip_local_header *header,
>  				     unsigned long size,
>  				     unsigned long compressed_size,
> -				     unsigned long crc)
> +				     unsigned long crc,
> +				     int *clamped)
>  {
>  	copy_le32(header->crc32, crc);
> -	copy_le32(header->compressed_size, compressed_size);
> -	copy_le32(header->size, size);
> +	copy_le32(header->compressed_size, clamp_max(compressed_size, 0xFFFFFFFFU, clamped));
> +	copy_le32(header->size, clamp_max(size, 0xFFFFFFFFU, clamped));
> +	if (clamped)
> +		zip_zip64 = 1;

This must be

	if (*clamped)

>  }
>
>  static int has_only_ascii(const char *s)
> @@ -279,6 +299,7 @@ static int write_zip_entry(struct archiver_args *args,
>  	int is_binary = -1;
>  	const char *path_without_prefix = path + args->baselen;
>  	unsigned int creator_version = 0;
> +	int clamped = 0;
>
>  	crc = crc32(0, NULL, 0);
>
> @@ -376,7 +397,7 @@ static int write_zip_entry(struct archiver_args *args,
>  	copy_le16(dirent.comment_length, 0);
>  	copy_le16(dirent.disk, 0);
>  	copy_le32(dirent.attr2, attr2);
> -	copy_le32(dirent.offset, zip_offset);
> +	copy_le32(dirent.offset, clamp_max(zip_offset, 0xFFFFFFFFU, &clamped));

I don't see any provisions to write the zip64 extra header in the 
central directory when this offset is clamped. This means that ZIP 
archives whose size exceed 4GB are still unsupported.

>
>  	copy_le32(header.magic, 0x04034b50);
>  	copy_le16(header.version, 10);
> @@ -384,15 +405,26 @@ static int write_zip_entry(struct archiver_args *args,
>  	copy_le16(header.compression_method, method);
>  	copy_le16(header.mtime, zip_time);
>  	copy_le16(header.mdate, zip_date);
> -	set_zip_header_data_desc(&header, size, compressed_size, crc);
> +	set_zip_header_data_desc(&header, size, compressed_size, crc, &clamped);
>  	copy_le16(header.filename_length, pathlen);
> -	copy_le16(header.extra_length, ZIP_EXTRA_MTIME_SIZE);
> +	copy_le16(header.extra_length, ZIP_EXTRA_MTIME_SIZE + (clamped ? ZIP_EXTRA_ZIP64_SIZE : 0));
>  	write_or_die(1, &header, ZIP_LOCAL_HEADER_SIZE);
>  	zip_offset += ZIP_LOCAL_HEADER_SIZE;
>  	write_or_die(1, path, pathlen);
>  	zip_offset += pathlen;
>  	write_or_die(1, &extra, ZIP_EXTRA_MTIME_SIZE);
>  	zip_offset += ZIP_EXTRA_MTIME_SIZE;
> +	if (clamped) {
> +		struct zip_extra_zip64 extra_zip64;
> +		copy_le16(extra_zip64.magic, 0x0001);
> +		copy_le16(extra_zip64.extra_size, ZIP_EXTRA_ZIP64_PAYLOAD_SIZE);
> +		copy_le64(extra_zip64.size, size >= 0xFFFFFFFFU ? size : 0);
> +		copy_le64(extra_zip64.compressed_size, compressed_size >= 0xFFFFFFFFU ? compressed_size : 0);
> +		copy_le64(extra_zip64.offset, zip_offset >= 0xFFFFFFFFU ? zip_offset : 0);
> +		copy_le32(extra_zip64.disk, 0);

These are wrong, I think. Entries that did not overflow must be omitted 
entirely from the zip64 extra record, not filled with 0. This implies 
that the payload size (.extra_size) is dynamic.

As René pointed out, the offset is only written in the central 
directory, but not in the local header for the current file. Therefore, 
it must be omitted here. The disk number also never exceeds 0xffff and 
must be omitted as well.

> +		write_or_die(1, &extra_zip64, ZIP_EXTRA_ZIP64_SIZE);
> +		zip_offset += ZIP_EXTRA_ZIP64_SIZE;
> +	}
>  	if (stream && method == 0) {
>  		unsigned char buf[STREAM_BUFFER_SIZE];
>  		ssize_t readlen;
> @@ -538,7 +570,7 @@ static void write_zip_trailer(const unsigned char *sha1)
>  	copy_le16(trailer.comment_length, sha1 ? GIT_SHA1_HEXSZ : 0);
>
>  	write_or_die(1, zip_dir, zip_dir_offset);
> -	if (clamped)
> +	if (clamped || zip_zip64)
>  		write_zip64_trailer();
>  	write_or_die(1, &trailer, ZIP_DIR_TRAILER_SIZE);
>  	if (sha1)
>

-- Hannes


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v2] archive-zip: Add zip64 headers when file size is too large for 32 bits
  2017-04-23  7:50       ` Johannes Sixt
@ 2017-04-23 14:51         ` Peter Krefting
  2017-04-23 19:49           ` Johannes Sixt
  0 siblings, 1 reply; 44+ messages in thread
From: Peter Krefting @ 2017-04-23 14:51 UTC (permalink / raw)
  To: Johannes Sixt; +Cc: René Scharfe, git, Keith Goldfarb

Johannes Sixt:

> Let's get the naming straight: There is no "zip64 local header". There is a 
> "zip64 extra record" for the "zip local header".

Indeed, sorry for the confusion. That's what I get for trying to write 
coherent email at half past midnight :-)

> The zip64 extra data record has an offset field, but since the local 
> header does not have an offset field, the offset field in the 
> corresponding zip64 extra data record is always omitted.

Ah, now I understand, I was a bit confused, as the same code generates 
the central directory entry as the local entry.

>> @@ -376,7 +397,7 @@ static int write_zip_entry(struct archiver_args *args,
>>  	copy_le16(dirent.comment_length, 0);
>>  	copy_le16(dirent.disk, 0);
>>  	copy_le32(dirent.attr2, attr2);
>> -	copy_le32(dirent.offset, zip_offset);
>> +	copy_le32(dirent.offset, clamp_max(zip_offset, 0xFFFFFFFFU, 
>> &clamped));
>
> I don't see any provisions to write the zip64 extra header in the central 
> directory when this offset is clamped. This means that ZIP archives whose 
> size exceed 4GB are still unsupported.

The clamped flag will trigger the inclusion of the zip64 central 
directory, which contains the 64-bit offset. Should the central 
directory entry also have the zip64 extra field?

> These are wrong, I think. Entries that did not overflow must be omitted 
> entirely from the zip64 extra record, not filled with 0. This implies that 
> the payload size (.extra_size) is dynamic.

That is what I was trying to figure out, APPNOTE is extremely vague on 
the subject, but thinking back I recall that you are correct.

-- 
\\// Peter - http://www.softwolves.pp.se/

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v2] archive-zip: Add zip64 headers when file size is too large for 32 bits
  2017-04-23 14:51         ` Peter Krefting
@ 2017-04-23 19:49           ` Johannes Sixt
  2017-04-24  8:04             ` Peter Krefting
  0 siblings, 1 reply; 44+ messages in thread
From: Johannes Sixt @ 2017-04-23 19:49 UTC (permalink / raw)
  To: Peter Krefting; +Cc: René Scharfe, git, Keith Goldfarb

Am 23.04.2017 um 16:51 schrieb Peter Krefting:
> Johannes Sixt:
>>> @@ -376,7 +397,7 @@ static int write_zip_entry(struct archiver_args
>>> *args,
>>>      copy_le16(dirent.comment_length, 0);
>>>      copy_le16(dirent.disk, 0);
>>>      copy_le32(dirent.attr2, attr2);
>>> -    copy_le32(dirent.offset, zip_offset);
>>> +    copy_le32(dirent.offset, clamp_max(zip_offset, 0xFFFFFFFFU,
>>> &clamped));
>>
>> I don't see any provisions to write the zip64 extra header in the
>> central directory when this offset is clamped. This means that ZIP
>> archives whose size exceed 4GB are still unsupported.
>
> The clamped flag will trigger the inclusion of the zip64 central
> directory, which contains the 64-bit offset. Should the central
> directory entry also have the zip64 extra field?

There is no "zip64 central directory". There is a "zip64 end of central 
directory record"; it tells where to find the "central directory" in 
case that the ZIP archive exceeds 4GB. The central directory has the 
same format in a non-zip64 and a zip64 archive. But when size, 
compressed size, and offset overflow 4GB, it uses the same zip64 extra 
record like the local header records, except that it has to record an 
offset in addition to the uncompressed and compressed sizes.

The uncompressed and compressed sizes of entries are mentioned in both 
the central directory and the individual local headers. I think that the 
central directory's values are authorative; my reasoning is that it is 
possible that the local header can have a bit set that tells that the 
local header's values size values are garbage.

In summary, yes, when the central directory is constructed, it must use 
the zip64 extra record to note the values of uncompressed size, 
compressed size, and the offset to the local header when they overflow 4GB.

-- Hannes


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v2] archive-zip: Add zip64 headers when file size is too large for 32 bits
  2017-04-23 19:49           ` Johannes Sixt
@ 2017-04-24  8:04             ` Peter Krefting
  2017-04-24 12:04               ` René Scharfe
  0 siblings, 1 reply; 44+ messages in thread
From: Peter Krefting @ 2017-04-24  8:04 UTC (permalink / raw)
  To: Johannes Sixt; +Cc: René Scharfe, git, Keith Goldfarb

Johannes Sixt:

> There is no "zip64 central directory". There is a "zip64 end of central 
> directory record";

Not strange that I was confused and couldn't find it, then... :-)

All right, I need to fix up my patch to make sure I add the zip64 
extra record to both the central directory entry and to the local 
header, and make sure to trigger the zip64 end of central directory 
whenever the zip file is large enough to warrant one.

> In summary, yes, when the central directory is constructed, it must 
> use the zip64 extra record to note the values of uncompressed size, 
> compressed size, and the offset to the local header when they 
> overflow 4GB.

At least that makes it easier to construct, as we only have one 
central directory and can just extend the records that need extending.

Will fix soon.

-- 
\\// Peter - http://www.softwolves.pp.se/

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v2] archive-zip: Add zip64 headers when file size is too large for 32 bits
  2017-04-24  8:04             ` Peter Krefting
@ 2017-04-24 12:04               ` René Scharfe
  2017-04-24 17:22                 ` [PATCH v3 0/5] archive-zip: support files and archives bigger than 4GB René Scharfe
  0 siblings, 1 reply; 44+ messages in thread
From: René Scharfe @ 2017-04-24 12:04 UTC (permalink / raw)
  To: Peter Krefting, Johannes Sixt; +Cc: git, Keith Goldfarb

Am 24.04.2017 um 10:04 schrieb Peter Krefting:
> Johannes Sixt:
> 
>> There is no "zip64 central directory". There is a "zip64 end of 
>> central directory record";
> 
> Not strange that I was confused and couldn't find it, then... :-)
> 
> All right, I need to fix up my patch to make sure I add the zip64 extra 
> record to both the central directory entry and to the local header, and 
> make sure to trigger the zip64 end of central directory whenever the zip 
> file is large enough to warrant one.
> 
>> In summary, yes, when the central directory is constructed, it must 
>> use the zip64 extra record to note the values of uncompressed size, 
>> compressed size, and the offset to the local header when they overflow 
>> 4GB.
> 
> At least that makes it easier to construct, as we only have one central 
> directory and can just extend the records that need extending.
> 
> Will fix soon.

I have a few patches for that as well.  Testing in particular is a bit 
tricky -- how to avoid creating multi-GB files just for this small 
feature?  I hope to have something to show later today.

René

^ permalink raw reply	[flat|nested] 44+ messages in thread

* [PATCH v3 0/5] archive-zip: support files and archives bigger than 4GB
  2017-04-24 12:04               ` René Scharfe
@ 2017-04-24 17:22                 ` René Scharfe
  2017-04-24 17:29                   ` [PATCH v3 1/5] archive-zip: add tests for big ZIP archives René Scharfe
                                     ` (5 more replies)
  0 siblings, 6 replies; 44+ messages in thread
From: René Scharfe @ 2017-04-24 17:22 UTC (permalink / raw)
  To: Peter Krefting, Johannes Sixt; +Cc: git, Keith Goldfarb

The first patch adds (expensive) tests, the next two are cleanups which
set the stage for the remaining two to actually implement zip64 support
for offsets and file sizes.

Half of the series had been laying around for months, half-finished and
forgotten because I got distracted by the holiday season. :-/

  archive-zip: add tests for big ZIP archives
  archive-zip: use strbuf for ZIP directory
  archive-zip: write ZIP dir entry directly to strbuf
  archive-zip: support archives bigger than 4GB
  archive-zip: support files bigger than 4GB

 archive-zip.c                   | 211 ++++++++++++++++++++++++----------------
 t/t5004-archive-corner-cases.sh |  45 +++++++++
 t/t5004/big-pack.zip            | Bin 0 -> 7373 bytes
 3 files changed, 172 insertions(+), 84 deletions(-)
 create mode 100644 t/t5004/big-pack.zip

-- 
2.12.2


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [PATCH v3 1/5] archive-zip: add tests for big ZIP archives
  2017-04-24 17:22                 ` [PATCH v3 0/5] archive-zip: support files and archives bigger than 4GB René Scharfe
@ 2017-04-24 17:29                   ` René Scharfe
  2017-04-24 17:30                   ` [PATCH v3 2/5] archive-zip: use strbuf for ZIP directory René Scharfe
                                     ` (4 subsequent siblings)
  5 siblings, 0 replies; 44+ messages in thread
From: René Scharfe @ 2017-04-24 17:29 UTC (permalink / raw)
  To: Peter Krefting, Johannes Sixt; +Cc: git, Keith Goldfarb

Test the creation of ZIP archives bigger than 4GB and containing files
bigger than 4GB.  They are marked as EXPENSIVE because they take quite a
while and because the first one needs a bit more than 4GB of disk space
to store the resulting archive.

The big archive in the first test is made up of a tree containing
thousands of copies of a small file.  Yet the test has to write out the
full archive because unzip doesn't offer a way to read from stdin.

The big file in the second test is provided as a zipped pack file to
avoid writing another 4GB file to disk and then adding it.

Signed-off-by: Rene Scharfe <l.s.r@web.de>
---
 t/t5004-archive-corner-cases.sh |  45 ++++++++++++++++++++++++++++++++++++++++
 t/t5004/big-pack.zip            | Bin 0 -> 7373 bytes
 2 files changed, 45 insertions(+)
 create mode 100644 t/t5004/big-pack.zip

diff --git a/t/t5004-archive-corner-cases.sh b/t/t5004-archive-corner-cases.sh
index cca23383c5..bc052c803a 100755
--- a/t/t5004-archive-corner-cases.sh
+++ b/t/t5004-archive-corner-cases.sh
@@ -155,4 +155,49 @@ test_expect_success ZIPINFO 'zip archive with many entries' '
 	test_cmp expect actual
 '
 
+test_expect_failure EXPENSIVE,UNZIP 'zip archive bigger than 4GB' '
+	# build string containing 65536 characters
+	s=0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef &&
+	s=$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s &&
+	s=$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s &&
+
+	# create blob with a length of 65536 + 1 bytes
+	blob=$(echo $s | git hash-object -w --stdin) &&
+
+	# create tree containing 65500 entries of that blob
+	for i in $(test_seq 1 65500)
+	do
+		echo "100644 blob $blob	$i"
+	done >tree &&
+	tree=$(git mktree <tree) &&
+
+	# zip it, creating an archive a bit bigger than 4GB
+	git archive -0 -o many-big.zip $tree &&
+
+	"$GIT_UNZIP" -t many-big.zip 9999 65500 &&
+	"$GIT_UNZIP" -t many-big.zip
+'
+
+test_expect_failure EXPENSIVE,UNZIP,ZIPINFO 'zip archive with files bigger than 4GB' '
+	# Pack created with:
+	#   dd if=/dev/zero of=file bs=1M count=4100 && git hash-object -w file
+	mkdir -p .git/objects/pack &&
+	(
+		cd .git/objects/pack &&
+		"$GIT_UNZIP" "$TEST_DIRECTORY"/t5004/big-pack.zip
+	) &&
+	blob=754a93d6fada4c6873360e6cb4b209132271ab0e &&
+	size=$(expr 4100 "*" 1024 "*" 1024) &&
+
+	# create a tree containing the file
+	tree=$(echo "100644 blob $blob	big-file" | git mktree) &&
+
+	# zip it, creating an archive with a file bigger than 4GB
+	git archive -o big.zip $tree &&
+
+	"$GIT_UNZIP" -t big.zip &&
+	"$ZIPINFO" big.zip >big.lst &&
+	grep $size big.lst
+'
+
 test_done
diff --git a/t/t5004/big-pack.zip b/t/t5004/big-pack.zip
new file mode 100644
index 0000000000000000000000000000000000000000..caaf614eeece6f818c525e433561e37560a75b05
GIT binary patch
literal 7373
zcmWIWW@Zs#U}E54xLY#A%SmVLl4u471|Jp%215oJhJwW8Y+ZvC^HlQ`3(KT5gS6zd
z#6$y=#3VyYlN6KGlw?bTG()50MB`LTb5p&{l#0+0P6p<OAOA*xaA^fM10%}|W(Ec@
z@jL#}{4)m*95`aY)ux;vb9C_xiI`WWKX0v%wv#-XR%01$R>-9-y6#!_6|EcGQyxED
zyZY^&^YwM2r?Wic_gf3wz0#?R{8hB^<Js4()@9Ms>$gt{iHiUEJN{iALjZ~|R(VzA
zOp{_@IDE*S!H8sEfc%Wl8*lHd?-DJPX@5BLD~n)>se}uQ{sHa{9CBhhi*hb3*-*jg
zc5o4&4%_XW4Ba<OG7P)VrhVaOTdmN<Ho4-C?R3NUpAYE!{5{5hc=zr*JFR#QUppur
z?|(T<>b?Enk9jiu`<7qbA8y|-Ck>2+hMBYN-pgCcFt>ev-5*|-&jb`JE>Hh`B*~cT
z&t2=?C44}E8GDaU*PD0ekK%{l7w@(f14RzJnfvea+fQlS75nRoU$s?(g=&R(fOLb%
zK_JPHAqeJ(fjJ$>oKcz4&>2l3kc=^!7e@2KXkHl23k(gTVK5p7z>;7z9gKznQp0()
zeK6WS7;PVn){Ud}!f4$%I)=h9I*!CJ8V10UU^E?!h5@KqG@1@Z!(cQWK$^#7<%OS{
z_HQoT!K!n;di%NDON?i(ygj-((dOTWgOj)4mXFhko42#<@9S6BTc6*5|Cc?$n~_P5
z8P`mn1SleafRRC^5k!+Qug40R*F&4rL$?-n>J8c2V<cM(nTW$>FDo0!BTPW}9!Q@6
I&6hC%0B76lcmMzZ

literal 0
HcmV?d00001

-- 
2.12.2


^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [PATCH v3 2/5] archive-zip: use strbuf for ZIP directory
  2017-04-24 17:22                 ` [PATCH v3 0/5] archive-zip: support files and archives bigger than 4GB René Scharfe
  2017-04-24 17:29                   ` [PATCH v3 1/5] archive-zip: add tests for big ZIP archives René Scharfe
@ 2017-04-24 17:30                   ` René Scharfe
  2017-04-25  4:51                     ` Junio C Hamano
  2017-04-24 17:31                   ` [PATCH v3 3/5] archive-zip: write ZIP dir entry directly to strbuf René Scharfe
                                     ` (3 subsequent siblings)
  5 siblings, 1 reply; 44+ messages in thread
From: René Scharfe @ 2017-04-24 17:30 UTC (permalink / raw)
  To: Peter Krefting, Johannes Sixt; +Cc: git, Keith Goldfarb

Keep the ZIP central director, which is written after all archive
entries, in a strbuf instead of a custom-managed buffer.  It contains
binary data, so we can't (and don't want to) use the full range of
strbuf functions and we don't need the terminating NUL, but the result
is shorter and simpler code.

Signed-off-by: Rene Scharfe <l.s.r@web.de>
---
 archive-zip.c | 36 +++++++++++-------------------------
 1 file changed, 11 insertions(+), 25 deletions(-)

diff --git a/archive-zip.c b/archive-zip.c
index b429a8d974..a6fac59602 100644
--- a/archive-zip.c
+++ b/archive-zip.c
@@ -11,16 +11,14 @@
 static int zip_date;
 static int zip_time;
 
-static unsigned char *zip_dir;
-static unsigned int zip_dir_size;
+/* We only care about the "buf" part here. */
+static struct strbuf zip_dir;
 
 static unsigned int zip_offset;
-static unsigned int zip_dir_offset;
 static uint64_t zip_dir_entries;
 
 static unsigned int max_creator_version;
 
-#define ZIP_DIRECTORY_MIN_SIZE	(1024 * 1024)
 #define ZIP_STREAM	(1 <<  3)
 #define ZIP_UTF8	(1 << 11)
 
@@ -268,7 +266,6 @@ static int write_zip_entry(struct archiver_args *args,
 	unsigned long attr2;
 	unsigned long compressed_size;
 	unsigned long crc;
-	unsigned long direntsize;
 	int method;
 	unsigned char *out;
 	void *deflated = NULL;
@@ -356,13 +353,6 @@ static int write_zip_entry(struct archiver_args *args,
 	extra.flags[0] = 1;	/* just mtime */
 	copy_le32(extra.mtime, args->time);
 
-	/* make sure we have enough free space in the dictionary */
-	direntsize = ZIP_DIR_HEADER_SIZE + pathlen + ZIP_EXTRA_MTIME_SIZE;
-	while (zip_dir_size < zip_dir_offset + direntsize) {
-		zip_dir_size += ZIP_DIRECTORY_MIN_SIZE;
-		zip_dir = xrealloc(zip_dir, zip_dir_size);
-	}
-
 	copy_le32(dirent.magic, 0x02014b50);
 	copy_le16(dirent.creator_version, creator_version);
 	copy_le16(dirent.version, 10);
@@ -486,12 +476,9 @@ static int write_zip_entry(struct archiver_args *args,
 
 	copy_le16(dirent.attr1, !is_binary);
 
-	memcpy(zip_dir + zip_dir_offset, &dirent, ZIP_DIR_HEADER_SIZE);
-	zip_dir_offset += ZIP_DIR_HEADER_SIZE;
-	memcpy(zip_dir + zip_dir_offset, path, pathlen);
-	zip_dir_offset += pathlen;
-	memcpy(zip_dir + zip_dir_offset, &extra, ZIP_EXTRA_MTIME_SIZE);
-	zip_dir_offset += ZIP_EXTRA_MTIME_SIZE;
+	strbuf_add(&zip_dir, &dirent, ZIP_DIR_HEADER_SIZE);
+	strbuf_add(&zip_dir, path, pathlen);
+	strbuf_add(&zip_dir, &extra, ZIP_EXTRA_MTIME_SIZE);
 	zip_dir_entries++;
 
 	return 0;
@@ -510,12 +497,12 @@ static void write_zip64_trailer(void)
 	copy_le32(trailer64.directory_start_disk, 0);
 	copy_le64(trailer64.entries_on_this_disk, zip_dir_entries);
 	copy_le64(trailer64.entries, zip_dir_entries);
-	copy_le64(trailer64.size, zip_dir_offset);
+	copy_le64(trailer64.size, zip_dir.len);
 	copy_le64(trailer64.offset, zip_offset);
 
 	copy_le32(locator64.magic, 0x07064b50);
 	copy_le32(locator64.disk, 0);
-	copy_le64(locator64.offset, zip_offset + zip_dir_offset);
+	copy_le64(locator64.offset, zip_offset + zip_dir.len);
 	copy_le32(locator64.number_of_disks, 1);
 
 	write_or_die(1, &trailer64, ZIP64_DIR_TRAILER_SIZE);
@@ -533,11 +520,11 @@ static void write_zip_trailer(const unsigned char *sha1)
 	copy_le16_clamp(trailer.entries_on_this_disk, zip_dir_entries,
 			&clamped);
 	copy_le16_clamp(trailer.entries, zip_dir_entries, &clamped);
-	copy_le32(trailer.size, zip_dir_offset);
+	copy_le32(trailer.size, zip_dir.len);
 	copy_le32(trailer.offset, zip_offset);
 	copy_le16(trailer.comment_length, sha1 ? GIT_SHA1_HEXSZ : 0);
 
-	write_or_die(1, zip_dir, zip_dir_offset);
+	write_or_die(1, zip_dir.buf, zip_dir.len);
 	if (clamped)
 		write_zip64_trailer();
 	write_or_die(1, &trailer, ZIP_DIR_TRAILER_SIZE);
@@ -568,14 +555,13 @@ static int write_zip_archive(const struct archiver *ar,
 
 	dos_time(&args->time, &zip_date, &zip_time);
 
-	zip_dir = xmalloc(ZIP_DIRECTORY_MIN_SIZE);
-	zip_dir_size = ZIP_DIRECTORY_MIN_SIZE;
+	strbuf_init(&zip_dir, 0);
 
 	err = write_archive_entries(args, write_zip_entry);
 	if (!err)
 		write_zip_trailer(args->commit_sha1);
 
-	free(zip_dir);
+	strbuf_release(&zip_dir);
 
 	return err;
 }
-- 
2.12.2


^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [PATCH v3 3/5] archive-zip: write ZIP dir entry directly to strbuf
  2017-04-24 17:22                 ` [PATCH v3 0/5] archive-zip: support files and archives bigger than 4GB René Scharfe
  2017-04-24 17:29                   ` [PATCH v3 1/5] archive-zip: add tests for big ZIP archives René Scharfe
  2017-04-24 17:30                   ` [PATCH v3 2/5] archive-zip: use strbuf for ZIP directory René Scharfe
@ 2017-04-24 17:31                   ` René Scharfe
  2017-04-24 17:32                   ` [PATCH v3 4/5] archive-zip: support archives bigger than 4GB René Scharfe
                                     ` (2 subsequent siblings)
  5 siblings, 0 replies; 44+ messages in thread
From: René Scharfe @ 2017-04-24 17:31 UTC (permalink / raw)
  To: Peter Krefting, Johannes Sixt; +Cc: git, Keith Goldfarb

Write all fields of the ZIP directory record for an archive entry
in the right order directly into the strbuf instead of taking a detour
through a struct.  Do that at end, when we have all necessary data like
checksum and compressed size.  The fields are documented just as well,
the code becomes shorter and we save an extra copy.

Signed-off-by: Rene Scharfe <l.s.r@web.de>
---
 archive-zip.c | 81 ++++++++++++++++++++---------------------------------------
 1 file changed, 27 insertions(+), 54 deletions(-)

diff --git a/archive-zip.c b/archive-zip.c
index a6fac59602..2d52bb3ade 100644
--- a/archive-zip.c
+++ b/archive-zip.c
@@ -45,27 +45,6 @@ struct zip_data_desc {
 	unsigned char _end[1];
 };
 
-struct zip_dir_header {
-	unsigned char magic[4];
-	unsigned char creator_version[2];
-	unsigned char version[2];
-	unsigned char flags[2];
-	unsigned char compression_method[2];
-	unsigned char mtime[2];
-	unsigned char mdate[2];
-	unsigned char crc32[4];
-	unsigned char compressed_size[4];
-	unsigned char size[4];
-	unsigned char filename_length[2];
-	unsigned char extra_length[2];
-	unsigned char comment_length[2];
-	unsigned char disk[2];
-	unsigned char attr1[2];
-	unsigned char attr2[4];
-	unsigned char offset[4];
-	unsigned char _end[1];
-};
-
 struct zip_dir_trailer {
 	unsigned char magic[4];
 	unsigned char disk[2];
@@ -166,6 +145,15 @@ static void copy_le16_clamp(unsigned char *dest, uint64_t n, int *clamped)
 	copy_le16(dest, clamp_max(n, 0xffff, clamped));
 }
 
+static int strbuf_add_le(struct strbuf *sb, size_t size, uintmax_t n)
+{
+	while (size-- > 0) {
+		strbuf_addch(sb, n & 0xff);
+		n >>= 8;
+	}
+	return -!!n;
+}
+
 static void *zlib_deflate_raw(void *data, unsigned long size,
 			      int compression_level,
 			      unsigned long *compressed_size)
@@ -212,16 +200,6 @@ static void write_zip_data_desc(unsigned long size,
 	write_or_die(1, &trailer, ZIP_DATA_DESC_SIZE);
 }
 
-static void set_zip_dir_data_desc(struct zip_dir_header *header,
-				  unsigned long size,
-				  unsigned long compressed_size,
-				  unsigned long crc)
-{
-	copy_le32(header->crc32, crc);
-	copy_le32(header->compressed_size, compressed_size);
-	copy_le32(header->size, size);
-}
-
 static void set_zip_header_data_desc(struct zip_local_header *header,
 				     unsigned long size,
 				     unsigned long compressed_size,
@@ -261,7 +239,7 @@ static int write_zip_entry(struct archiver_args *args,
 			   unsigned int mode)
 {
 	struct zip_local_header header;
-	struct zip_dir_header dirent;
+	uintmax_t offset = zip_offset;
 	struct zip_extra_mtime extra;
 	unsigned long attr2;
 	unsigned long compressed_size;
@@ -353,21 +331,6 @@ static int write_zip_entry(struct archiver_args *args,
 	extra.flags[0] = 1;	/* just mtime */
 	copy_le32(extra.mtime, args->time);
 
-	copy_le32(dirent.magic, 0x02014b50);
-	copy_le16(dirent.creator_version, creator_version);
-	copy_le16(dirent.version, 10);
-	copy_le16(dirent.flags, flags);
-	copy_le16(dirent.compression_method, method);
-	copy_le16(dirent.mtime, zip_time);
-	copy_le16(dirent.mdate, zip_date);
-	set_zip_dir_data_desc(&dirent, size, compressed_size, crc);
-	copy_le16(dirent.filename_length, pathlen);
-	copy_le16(dirent.extra_length, ZIP_EXTRA_MTIME_SIZE);
-	copy_le16(dirent.comment_length, 0);
-	copy_le16(dirent.disk, 0);
-	copy_le32(dirent.attr2, attr2);
-	copy_le32(dirent.offset, zip_offset);
-
 	copy_le32(header.magic, 0x04034b50);
 	copy_le16(header.version, 10);
 	copy_le16(header.flags, flags);
@@ -406,8 +369,6 @@ static int write_zip_entry(struct archiver_args *args,
 
 		write_zip_data_desc(size, compressed_size, crc);
 		zip_offset += ZIP_DATA_DESC_SIZE;
-
-		set_zip_dir_data_desc(&dirent, size, compressed_size, crc);
 	} else if (stream && method == 8) {
 		unsigned char buf[STREAM_BUFFER_SIZE];
 		ssize_t readlen;
@@ -464,8 +425,6 @@ static int write_zip_entry(struct archiver_args *args,
 
 		write_zip_data_desc(size, compressed_size, crc);
 		zip_offset += ZIP_DATA_DESC_SIZE;
-
-		set_zip_dir_data_desc(&dirent, size, compressed_size, crc);
 	} else if (compressed_size > 0) {
 		write_or_die(1, out, compressed_size);
 		zip_offset += compressed_size;
@@ -474,9 +433,23 @@ static int write_zip_entry(struct archiver_args *args,
 	free(deflated);
 	free(buffer);
 
-	copy_le16(dirent.attr1, !is_binary);
-
-	strbuf_add(&zip_dir, &dirent, ZIP_DIR_HEADER_SIZE);
+	strbuf_add_le(&zip_dir, 4, 0x02014b50);	/* magic */
+	strbuf_add_le(&zip_dir, 2, creator_version);
+	strbuf_add_le(&zip_dir, 2, 10);		/* version */
+	strbuf_add_le(&zip_dir, 2, flags);
+	strbuf_add_le(&zip_dir, 2, method);
+	strbuf_add_le(&zip_dir, 2, zip_time);
+	strbuf_add_le(&zip_dir, 2, zip_date);
+	strbuf_add_le(&zip_dir, 4, crc);
+	strbuf_add_le(&zip_dir, 4, compressed_size);
+	strbuf_add_le(&zip_dir, 4, size);
+	strbuf_add_le(&zip_dir, 2, pathlen);
+	strbuf_add_le(&zip_dir, 2, ZIP_EXTRA_MTIME_SIZE);
+	strbuf_add_le(&zip_dir, 2, 0);		/* comment length */
+	strbuf_add_le(&zip_dir, 2, 0);		/* disk */
+	strbuf_add_le(&zip_dir, 2, !is_binary);
+	strbuf_add_le(&zip_dir, 4, attr2);
+	strbuf_add_le(&zip_dir, 4, offset);
 	strbuf_add(&zip_dir, path, pathlen);
 	strbuf_add(&zip_dir, &extra, ZIP_EXTRA_MTIME_SIZE);
 	zip_dir_entries++;
-- 
2.12.2


^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [PATCH v3 4/5] archive-zip: support archives bigger than 4GB
  2017-04-24 17:22                 ` [PATCH v3 0/5] archive-zip: support files and archives bigger than 4GB René Scharfe
                                     ` (2 preceding siblings ...)
  2017-04-24 17:31                   ` [PATCH v3 3/5] archive-zip: write ZIP dir entry directly to strbuf René Scharfe
@ 2017-04-24 17:32                   ` René Scharfe
  2017-04-24 18:24                     ` Peter Krefting
  2017-04-24 17:33                   ` [PATCH v3 5/5] archive-zip: support files " René Scharfe
  2017-04-29 21:00                   ` [PATCH v3 0/5] archive-zip: support files and archives " Torsten Bögershausen
  5 siblings, 1 reply; 44+ messages in thread
From: René Scharfe @ 2017-04-24 17:32 UTC (permalink / raw)
  To: Peter Krefting, Johannes Sixt; +Cc: git, Keith Goldfarb

Add a zip64 extended information extra field to the central directory
and emit the zip64 end of central directory records as well as locator
if the offset of an entry within the archive exceeds 4GB.

Signed-off-by: Rene Scharfe <l.s.r@web.de>
---
 archive-zip.c                   | 32 ++++++++++++++++++++++++++++----
 t/t5004-archive-corner-cases.sh |  2 +-
 2 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/archive-zip.c b/archive-zip.c
index 2d52bb3ade..7d6f2a85d0 100644
--- a/archive-zip.c
+++ b/archive-zip.c
@@ -14,7 +14,7 @@ static int zip_time;
 /* We only care about the "buf" part here. */
 static struct strbuf zip_dir;
 
-static unsigned int zip_offset;
+static uintmax_t zip_offset;
 static uint64_t zip_dir_entries;
 
 static unsigned int max_creator_version;
@@ -145,6 +145,11 @@ static void copy_le16_clamp(unsigned char *dest, uint64_t n, int *clamped)
 	copy_le16(dest, clamp_max(n, 0xffff, clamped));
 }
 
+static void copy_le32_clamp(unsigned char *dest, uint64_t n, int *clamped)
+{
+	copy_le32(dest, clamp_max(n, 0xffffffff, clamped));
+}
+
 static int strbuf_add_le(struct strbuf *sb, size_t size, uintmax_t n)
 {
 	while (size-- > 0) {
@@ -154,6 +159,12 @@ static int strbuf_add_le(struct strbuf *sb, size_t size, uintmax_t n)
 	return -!!n;
 }
 
+static uint32_t clamp32(uintmax_t n)
+{
+	const uintmax_t max = 0xffffffff;
+	return (n < max) ? n : max;
+}
+
 static void *zlib_deflate_raw(void *data, unsigned long size,
 			      int compression_level,
 			      unsigned long *compressed_size)
@@ -254,6 +265,8 @@ static int write_zip_entry(struct archiver_args *args,
 	int is_binary = -1;
 	const char *path_without_prefix = path + args->baselen;
 	unsigned int creator_version = 0;
+	size_t zip_dir_extra_size = ZIP_EXTRA_MTIME_SIZE;
+	size_t zip64_dir_extra_payload_size = 0;
 
 	crc = crc32(0, NULL, 0);
 
@@ -433,6 +446,11 @@ static int write_zip_entry(struct archiver_args *args,
 	free(deflated);
 	free(buffer);
 
+	if (offset > 0xffffffff) {
+		zip64_dir_extra_payload_size += 8;
+		zip_dir_extra_size += 2 + 2 + zip64_dir_extra_payload_size;
+	}
+
 	strbuf_add_le(&zip_dir, 4, 0x02014b50);	/* magic */
 	strbuf_add_le(&zip_dir, 2, creator_version);
 	strbuf_add_le(&zip_dir, 2, 10);		/* version */
@@ -444,14 +462,20 @@ static int write_zip_entry(struct archiver_args *args,
 	strbuf_add_le(&zip_dir, 4, compressed_size);
 	strbuf_add_le(&zip_dir, 4, size);
 	strbuf_add_le(&zip_dir, 2, pathlen);
-	strbuf_add_le(&zip_dir, 2, ZIP_EXTRA_MTIME_SIZE);
+	strbuf_add_le(&zip_dir, 2, zip_dir_extra_size);
 	strbuf_add_le(&zip_dir, 2, 0);		/* comment length */
 	strbuf_add_le(&zip_dir, 2, 0);		/* disk */
 	strbuf_add_le(&zip_dir, 2, !is_binary);
 	strbuf_add_le(&zip_dir, 4, attr2);
-	strbuf_add_le(&zip_dir, 4, offset);
+	strbuf_add_le(&zip_dir, 4, clamp32(offset));
 	strbuf_add(&zip_dir, path, pathlen);
 	strbuf_add(&zip_dir, &extra, ZIP_EXTRA_MTIME_SIZE);
+	if (zip64_dir_extra_payload_size) {
+		strbuf_add_le(&zip_dir, 2, 0x0001);	/* magic */
+		strbuf_add_le(&zip_dir, 2, zip64_dir_extra_payload_size);
+		if (offset >= 0xffffffff)
+			strbuf_add_le(&zip_dir, 8, offset);
+	}
 	zip_dir_entries++;
 
 	return 0;
@@ -494,7 +518,7 @@ static void write_zip_trailer(const unsigned char *sha1)
 			&clamped);
 	copy_le16_clamp(trailer.entries, zip_dir_entries, &clamped);
 	copy_le32(trailer.size, zip_dir.len);
-	copy_le32(trailer.offset, zip_offset);
+	copy_le32_clamp(trailer.offset, zip_offset, &clamped);
 	copy_le16(trailer.comment_length, sha1 ? GIT_SHA1_HEXSZ : 0);
 
 	write_or_die(1, zip_dir.buf, zip_dir.len);
diff --git a/t/t5004-archive-corner-cases.sh b/t/t5004-archive-corner-cases.sh
index bc052c803a..0ac94b5cc9 100755
--- a/t/t5004-archive-corner-cases.sh
+++ b/t/t5004-archive-corner-cases.sh
@@ -155,7 +155,7 @@ test_expect_success ZIPINFO 'zip archive with many entries' '
 	test_cmp expect actual
 '
 
-test_expect_failure EXPENSIVE,UNZIP 'zip archive bigger than 4GB' '
+test_expect_success EXPENSIVE,UNZIP 'zip archive bigger than 4GB' '
 	# build string containing 65536 characters
 	s=0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef &&
 	s=$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s &&
-- 
2.12.2


^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [PATCH v3 5/5] archive-zip: support files bigger than 4GB
  2017-04-24 17:22                 ` [PATCH v3 0/5] archive-zip: support files and archives bigger than 4GB René Scharfe
                                     ` (3 preceding siblings ...)
  2017-04-24 17:32                   ` [PATCH v3 4/5] archive-zip: support archives bigger than 4GB René Scharfe
@ 2017-04-24 17:33                   ` René Scharfe
  2017-04-24 21:11                     ` Keith Goldfarb
  2017-04-25  4:46                     ` Junio C Hamano
  2017-04-29 21:00                   ` [PATCH v3 0/5] archive-zip: support files and archives " Torsten Bögershausen
  5 siblings, 2 replies; 44+ messages in thread
From: René Scharfe @ 2017-04-24 17:33 UTC (permalink / raw)
  To: Peter Krefting, Johannes Sixt; +Cc: git, Keith Goldfarb

Write a zip64 extended information extra field for big files as part of
their local headers and as part of their central directory headers.
Also write a zip64 version of the data descriptor in that case.

If we're streaming then we don't know the compressed size at the time we
write the header.  Deflate can end up making a file bigger instead of
smaller if we're unlucky.  Write a local zip64 header already for files
with a size of 2GB or more in this case to be on the safe side.

Both sizes need to be included in the local zip64 header, but the extra
field for the directory must only contain 64-bit equivalents for 32-bit
values of 0xffffffff.

Signed-off-by: Rene Scharfe <l.s.r@web.de>
---
 archive-zip.c                   | 90 ++++++++++++++++++++++++++++++++++-------
 t/t5004-archive-corner-cases.sh |  2 +-
 2 files changed, 76 insertions(+), 16 deletions(-)

diff --git a/archive-zip.c b/archive-zip.c
index 7d6f2a85d0..44ed78f163 100644
--- a/archive-zip.c
+++ b/archive-zip.c
@@ -45,6 +45,14 @@ struct zip_data_desc {
 	unsigned char _end[1];
 };
 
+struct zip64_data_desc {
+	unsigned char magic[4];
+	unsigned char crc32[4];
+	unsigned char compressed_size[8];
+	unsigned char size[8];
+	unsigned char _end[1];
+};
+
 struct zip_dir_trailer {
 	unsigned char magic[4];
 	unsigned char disk[2];
@@ -65,6 +73,14 @@ struct zip_extra_mtime {
 	unsigned char _end[1];
 };
 
+struct zip64_extra {
+	unsigned char magic[2];
+	unsigned char extra_size[2];
+	unsigned char size[8];
+	unsigned char compressed_size[8];
+	unsigned char _end[1];
+};
+
 struct zip64_dir_trailer {
 	unsigned char magic[4];
 	unsigned char record_size[8];
@@ -94,11 +110,15 @@ struct zip64_dir_trailer_locator {
  */
 #define ZIP_LOCAL_HEADER_SIZE	offsetof(struct zip_local_header, _end)
 #define ZIP_DATA_DESC_SIZE	offsetof(struct zip_data_desc, _end)
+#define ZIP64_DATA_DESC_SIZE	offsetof(struct zip64_data_desc, _end)
 #define ZIP_DIR_HEADER_SIZE	offsetof(struct zip_dir_header, _end)
 #define ZIP_DIR_TRAILER_SIZE	offsetof(struct zip_dir_trailer, _end)
 #define ZIP_EXTRA_MTIME_SIZE	offsetof(struct zip_extra_mtime, _end)
 #define ZIP_EXTRA_MTIME_PAYLOAD_SIZE \
 	(ZIP_EXTRA_MTIME_SIZE - offsetof(struct zip_extra_mtime, flags))
+#define ZIP64_EXTRA_SIZE	offsetof(struct zip64_extra, _end)
+#define ZIP64_EXTRA_PAYLOAD_SIZE \
+	(ZIP64_EXTRA_SIZE - offsetof(struct zip64_extra, size))
 #define ZIP64_DIR_TRAILER_SIZE	offsetof(struct zip64_dir_trailer, _end)
 #define ZIP64_DIR_TRAILER_RECORD_SIZE \
 	(ZIP64_DIR_TRAILER_SIZE - \
@@ -202,13 +222,23 @@ static void write_zip_data_desc(unsigned long size,
 				unsigned long compressed_size,
 				unsigned long crc)
 {
-	struct zip_data_desc trailer;
-
-	copy_le32(trailer.magic, 0x08074b50);
-	copy_le32(trailer.crc32, crc);
-	copy_le32(trailer.compressed_size, compressed_size);
-	copy_le32(trailer.size, size);
-	write_or_die(1, &trailer, ZIP_DATA_DESC_SIZE);
+	if (size >= 0xffffffff || compressed_size >= 0xffffffff) {
+		struct zip64_data_desc trailer;
+		copy_le32(trailer.magic, 0x08074b50);
+		copy_le32(trailer.crc32, crc);
+		copy_le64(trailer.compressed_size, compressed_size);
+		copy_le64(trailer.size, size);
+		write_or_die(1, &trailer, ZIP64_DATA_DESC_SIZE);
+		zip_offset += ZIP64_DATA_DESC_SIZE;
+	} else {
+		struct zip_data_desc trailer;
+		copy_le32(trailer.magic, 0x08074b50);
+		copy_le32(trailer.crc32, crc);
+		copy_le32(trailer.compressed_size, compressed_size);
+		copy_le32(trailer.size, size);
+		write_or_die(1, &trailer, ZIP_DATA_DESC_SIZE);
+		zip_offset += ZIP_DATA_DESC_SIZE;
+	}
 }
 
 static void set_zip_header_data_desc(struct zip_local_header *header,
@@ -252,6 +282,9 @@ static int write_zip_entry(struct archiver_args *args,
 	struct zip_local_header header;
 	uintmax_t offset = zip_offset;
 	struct zip_extra_mtime extra;
+	struct zip64_extra extra64;
+	size_t header_extra_size = ZIP_EXTRA_MTIME_SIZE;
+	int need_zip64_extra = 0;
 	unsigned long attr2;
 	unsigned long compressed_size;
 	unsigned long crc;
@@ -344,21 +377,40 @@ static int write_zip_entry(struct archiver_args *args,
 	extra.flags[0] = 1;	/* just mtime */
 	copy_le32(extra.mtime, args->time);
 
+	if (size > 0xffffffff || compressed_size > 0xffffffff)
+		need_zip64_extra = 1;
+	if (stream && size > 0x7fffffff)
+		need_zip64_extra = 1;
+
 	copy_le32(header.magic, 0x04034b50);
 	copy_le16(header.version, 10);
 	copy_le16(header.flags, flags);
 	copy_le16(header.compression_method, method);
 	copy_le16(header.mtime, zip_time);
 	copy_le16(header.mdate, zip_date);
-	set_zip_header_data_desc(&header, size, compressed_size, crc);
+	if (need_zip64_extra) {
+		set_zip_header_data_desc(&header, 0xffffffff, 0xffffffff, crc);
+		header_extra_size += ZIP64_EXTRA_SIZE;
+	} else {
+		set_zip_header_data_desc(&header, size, compressed_size, crc);
+	}
 	copy_le16(header.filename_length, pathlen);
-	copy_le16(header.extra_length, ZIP_EXTRA_MTIME_SIZE);
+	copy_le16(header.extra_length, header_extra_size);
 	write_or_die(1, &header, ZIP_LOCAL_HEADER_SIZE);
 	zip_offset += ZIP_LOCAL_HEADER_SIZE;
 	write_or_die(1, path, pathlen);
 	zip_offset += pathlen;
 	write_or_die(1, &extra, ZIP_EXTRA_MTIME_SIZE);
 	zip_offset += ZIP_EXTRA_MTIME_SIZE;
+	if (need_zip64_extra) {
+		copy_le16(extra64.magic, 0x0001);
+		copy_le16(extra64.extra_size, ZIP64_EXTRA_PAYLOAD_SIZE);
+		copy_le64(extra64.size, size);
+		copy_le64(extra64.compressed_size, compressed_size);
+		write_or_die(1, &extra64, ZIP64_EXTRA_SIZE);
+		zip_offset += ZIP64_EXTRA_SIZE;
+	}
+
 	if (stream && method == 0) {
 		unsigned char buf[STREAM_BUFFER_SIZE];
 		ssize_t readlen;
@@ -381,7 +433,6 @@ static int write_zip_entry(struct archiver_args *args,
 		zip_offset += compressed_size;
 
 		write_zip_data_desc(size, compressed_size, crc);
-		zip_offset += ZIP_DATA_DESC_SIZE;
 	} else if (stream && method == 8) {
 		unsigned char buf[STREAM_BUFFER_SIZE];
 		ssize_t readlen;
@@ -437,7 +488,6 @@ static int write_zip_entry(struct archiver_args *args,
 		zip_offset += compressed_size;
 
 		write_zip_data_desc(size, compressed_size, crc);
-		zip_offset += ZIP_DATA_DESC_SIZE;
 	} else if (compressed_size > 0) {
 		write_or_die(1, out, compressed_size);
 		zip_offset += compressed_size;
@@ -446,8 +496,14 @@ static int write_zip_entry(struct archiver_args *args,
 	free(deflated);
 	free(buffer);
 
-	if (offset > 0xffffffff) {
-		zip64_dir_extra_payload_size += 8;
+	if (compressed_size > 0xffffffff || size > 0xffffffff ||
+	    offset > 0xffffffff) {
+		if (compressed_size >= 0xffffffff)
+			zip64_dir_extra_payload_size += 8;
+		if (size >= 0xffffffff)
+			zip64_dir_extra_payload_size += 8;
+		if (offset >= 0xffffffff)
+			zip64_dir_extra_payload_size += 8;
 		zip_dir_extra_size += 2 + 2 + zip64_dir_extra_payload_size;
 	}
 
@@ -459,8 +515,8 @@ static int write_zip_entry(struct archiver_args *args,
 	strbuf_add_le(&zip_dir, 2, zip_time);
 	strbuf_add_le(&zip_dir, 2, zip_date);
 	strbuf_add_le(&zip_dir, 4, crc);
-	strbuf_add_le(&zip_dir, 4, compressed_size);
-	strbuf_add_le(&zip_dir, 4, size);
+	strbuf_add_le(&zip_dir, 4, clamp32(compressed_size));
+	strbuf_add_le(&zip_dir, 4, clamp32(size));
 	strbuf_add_le(&zip_dir, 2, pathlen);
 	strbuf_add_le(&zip_dir, 2, zip_dir_extra_size);
 	strbuf_add_le(&zip_dir, 2, 0);		/* comment length */
@@ -473,6 +529,10 @@ static int write_zip_entry(struct archiver_args *args,
 	if (zip64_dir_extra_payload_size) {
 		strbuf_add_le(&zip_dir, 2, 0x0001);	/* magic */
 		strbuf_add_le(&zip_dir, 2, zip64_dir_extra_payload_size);
+		if (size >= 0xffffffff)
+			strbuf_add_le(&zip_dir, 8, size);
+		if (compressed_size >= 0xffffffff)
+			strbuf_add_le(&zip_dir, 8, compressed_size);
 		if (offset >= 0xffffffff)
 			strbuf_add_le(&zip_dir, 8, offset);
 	}
diff --git a/t/t5004-archive-corner-cases.sh b/t/t5004-archive-corner-cases.sh
index 0ac94b5cc9..a6875dfdb1 100755
--- a/t/t5004-archive-corner-cases.sh
+++ b/t/t5004-archive-corner-cases.sh
@@ -178,7 +178,7 @@ test_expect_success EXPENSIVE,UNZIP 'zip archive bigger than 4GB' '
 	"$GIT_UNZIP" -t many-big.zip
 '
 
-test_expect_failure EXPENSIVE,UNZIP 'zip archive with files bigger than 4GB' '
+test_expect_success EXPENSIVE,UNZIP,ZIPINFO 'zip archive with files bigger than 4GB' '
 	# Pack created with:
 	#   dd if=/dev/zero of=file bs=1M count=4100 && git hash-object -w file
 	mkdir -p .git/objects/pack &&
-- 
2.12.2


^ permalink raw reply related	[flat|nested] 44+ messages in thread

* Re: [PATCH v3 4/5] archive-zip: support archives bigger than 4GB
  2017-04-24 17:32                   ` [PATCH v3 4/5] archive-zip: support archives bigger than 4GB René Scharfe
@ 2017-04-24 18:24                     ` Peter Krefting
  2017-04-24 20:06                       ` René Scharfe
  0 siblings, 1 reply; 44+ messages in thread
From: Peter Krefting @ 2017-04-24 18:24 UTC (permalink / raw)
  To: René Scharfe; +Cc: Johannes Sixt, git, Keith Goldfarb

René Scharfe:

> @@ -433,6 +446,11 @@ static int write_zip_entry(struct archiver_args *args,
> 	free(deflated);
> 	free(buffer);
>
> +	if (offset > 0xffffffff) {
> +		zip64_dir_extra_payload_size += 8;
> +		zip_dir_extra_size += 2 + 2 + zip64_dir_extra_payload_size;
> +	}
> +
> 	strbuf_add_le(&zip_dir, 4, 0x02014b50);	/* magic */
> 	strbuf_add_le(&zip_dir, 2, creator_version);
> 	strbuf_add_le(&zip_dir, 2, 10);		/* version */

This needs to be >=. The spec says that if the value is 0xffffffff, 
there should be a zip64 record with the actual size (even if it is 
0xffffffff).

Also set the version required to 45 (4.5) for any record that has zip64 
fields.

-- 
\\// Peter - http://www.softwolves.pp.se/

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v3 4/5] archive-zip: support archives bigger than 4GB
  2017-04-24 18:24                     ` Peter Krefting
@ 2017-04-24 20:06                       ` René Scharfe
  2017-04-24 20:39                         ` René Scharfe
                                           ` (2 more replies)
  0 siblings, 3 replies; 44+ messages in thread
From: René Scharfe @ 2017-04-24 20:06 UTC (permalink / raw)
  To: Peter Krefting; +Cc: Johannes Sixt, git, Keith Goldfarb

Am 24.04.2017 um 20:24 schrieb Peter Krefting:
> René Scharfe:
> 
>> @@ -433,6 +446,11 @@ static int write_zip_entry(struct archiver_args 
>> *args,
>>     free(deflated);
>>     free(buffer);
>>
>> +    if (offset > 0xffffffff) {
>> +        zip64_dir_extra_payload_size += 8;
>> +        zip_dir_extra_size += 2 + 2 + zip64_dir_extra_payload_size;
>> +    }
>> +
>>     strbuf_add_le(&zip_dir, 4, 0x02014b50);    /* magic */
>>     strbuf_add_le(&zip_dir, 2, creator_version);
>>     strbuf_add_le(&zip_dir, 2, 10);        /* version */
> 
> This needs to be >=. The spec says that if the value is 0xffffffff, 
> there should be a zip64 record with the actual size (even if it is 
> 0xffffffff).

Could you please cite the relevant part?

Here's how I read it: If a value doesn't fit into a 32-bit field it is 
set to 0xffffffff, a zip64 extra is added and a 64-bit field stores the 
actual value.  The magic value 0xffffffff indicates that a corresponding 
64-bit field is present in the zip64 extra.  That means even if a value 
is 0xffffffff (and thus fits) we need to add it to the zip64 extra.  If 
there is no zip64 extra then we can store 0xffffffff in the 32-bit 
field, though.

> Also set the version required to 45 (4.5) for any record that has zip64 
> fields.

Ah, yes indeed.

René

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v3 4/5] archive-zip: support archives bigger than 4GB
  2017-04-24 20:06                       ` René Scharfe
@ 2017-04-24 20:39                         ` René Scharfe
  2017-04-24 21:02                         ` Johannes Sixt
  2017-04-25  7:55                         ` Peter Krefting
  2 siblings, 0 replies; 44+ messages in thread
From: René Scharfe @ 2017-04-24 20:39 UTC (permalink / raw)
  To: Peter Krefting; +Cc: Johannes Sixt, git, Keith Goldfarb

Am 24.04.2017 um 22:06 schrieb René Scharfe:
> Am 24.04.2017 um 20:24 schrieb Peter Krefting:
>> René Scharfe:
>> Also set the version required to 45 (4.5) for any record that has 
>> zip64 fields.
> 
> Ah, yes indeed.

When I tried to implement this I realized that should set 20 for 
directories, but we use 10 for everything.  Then I compared with InfoZIP 
zip and noticed that they only ever sets 45 for the zip64 end of central 
directory record and 10 for everything else.  So I think we can/should 
keep it as it is, for compatibility's sake -- unless there is a problem 
with an old (but still relevant) extractor.

René

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v3 4/5] archive-zip: support archives bigger than 4GB
  2017-04-24 20:06                       ` René Scharfe
  2017-04-24 20:39                         ` René Scharfe
@ 2017-04-24 21:02                         ` Johannes Sixt
  2017-04-24 21:41                           ` René Scharfe
  2017-04-25  7:55                         ` Peter Krefting
  2 siblings, 1 reply; 44+ messages in thread
From: Johannes Sixt @ 2017-04-24 21:02 UTC (permalink / raw)
  To: René Scharfe; +Cc: Peter Krefting, git, Keith Goldfarb

Am 24.04.2017 um 22:06 schrieb René Scharfe:
> Am 24.04.2017 um 20:24 schrieb Peter Krefting:
>> René Scharfe:
>>
>>> @@ -433,6 +446,11 @@ static int write_zip_entry(struct archiver_args
>>> *args,
>>>     free(deflated);
>>>     free(buffer);
>>>
>>> +    if (offset > 0xffffffff) {
>>> +        zip64_dir_extra_payload_size += 8;
>>> +        zip_dir_extra_size += 2 + 2 + zip64_dir_extra_payload_size;
>>> +    }
>>> +
>>>     strbuf_add_le(&zip_dir, 4, 0x02014b50);    /* magic */
>>>     strbuf_add_le(&zip_dir, 2, creator_version);
>>>     strbuf_add_le(&zip_dir, 2, 10);        /* version */
>>
>> This needs to be >=. The spec says that if the value is 0xffffffff,
>> there should be a zip64 record with the actual size (even if it is
>> 0xffffffff).
>
> Could you please cite the relevant part?
>
> Here's how I read it: If a value doesn't fit into a 32-bit field it is
> set to 0xffffffff, a zip64 extra is added and a 64-bit field stores the
> actual value.  The magic value 0xffffffff indicates that a corresponding
> 64-bit field is present in the zip64 extra.  That means even if a value
> is 0xffffffff (and thus fits) we need to add it to the zip64 extra.  If
> there is no zip64 extra then we can store 0xffffffff in the 32-bit
> field, though.

The reader I wrote recently interprets 0xffffffff as special if the 
version is 45. Then, if there is no zip64 extra record, it is a broken 
ZIP archive. You are saying that my reader is wrong in this special case...

-- Hannes


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v3 5/5] archive-zip: support files bigger than 4GB
  2017-04-24 17:33                   ` [PATCH v3 5/5] archive-zip: support files " René Scharfe
@ 2017-04-24 21:11                     ` Keith Goldfarb
  2017-04-25  4:46                     ` Junio C Hamano
  1 sibling, 0 replies; 44+ messages in thread
From: Keith Goldfarb @ 2017-04-24 21:11 UTC (permalink / raw)
  To: René Scharfe; +Cc: Peter Krefting, Johannes Sixt, git

This set of patches works for my test case.

Thanks,

K.


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v3 4/5] archive-zip: support archives bigger than 4GB
  2017-04-24 21:02                         ` Johannes Sixt
@ 2017-04-24 21:41                           ` René Scharfe
  0 siblings, 0 replies; 44+ messages in thread
From: René Scharfe @ 2017-04-24 21:41 UTC (permalink / raw)
  To: Johannes Sixt; +Cc: Peter Krefting, git, Keith Goldfarb

Am 24.04.2017 um 23:02 schrieb Johannes Sixt:
> Am 24.04.2017 um 22:06 schrieb René Scharfe:
>> Am 24.04.2017 um 20:24 schrieb Peter Krefting:
>>> René Scharfe:
>>>
>>>> @@ -433,6 +446,11 @@ static int write_zip_entry(struct archiver_args
>>>> *args,
>>>>     free(deflated);
>>>>     free(buffer);
>>>>
>>>> +    if (offset > 0xffffffff) {
>>>> +        zip64_dir_extra_payload_size += 8;
>>>> +        zip_dir_extra_size += 2 + 2 + zip64_dir_extra_payload_size;
>>>> +    }
>>>> +
>>>>     strbuf_add_le(&zip_dir, 4, 0x02014b50);    /* magic */
>>>>     strbuf_add_le(&zip_dir, 2, creator_version);
>>>>     strbuf_add_le(&zip_dir, 2, 10);        /* version */
>>>
>>> This needs to be >=. The spec says that if the value is 0xffffffff,
>>> there should be a zip64 record with the actual size (even if it is
>>> 0xffffffff).
>>
>> Could you please cite the relevant part?
>>
>> Here's how I read it: If a value doesn't fit into a 32-bit field it is
>> set to 0xffffffff, a zip64 extra is added and a 64-bit field stores the
>> actual value.  The magic value 0xffffffff indicates that a corresponding
>> 64-bit field is present in the zip64 extra.  That means even if a value
>> is 0xffffffff (and thus fits) we need to add it to the zip64 extra.  If
>> there is no zip64 extra then we can store 0xffffffff in the 32-bit
>> field, though.
> 
> The reader I wrote recently interprets 0xffffffff as special if the 
> version is 45. Then, if there is no zip64 extra record, it is a broken 
> ZIP archive. You are saying that my reader is wrong in this special case...

The "version needed to extract" field is not particularly well-suited
for feature detection.  If you e.g. compress with LZMA then it should be
set to 63 even for small files that need no zip64 extra field.  So it
seems to rather be meant to allow readers to spot headers they can't
interpret because they were written against an earlier version of the
spec.

InfoZIP's zip writes 10 into the version field, no matter if the entry
has a zip64 extra field or not, as I wrote in my other reply.  That
seems to collide with the spec, but I'd expect readers to be able to
handle its output.

René

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v3 5/5] archive-zip: support files bigger than 4GB
  2017-04-24 17:33                   ` [PATCH v3 5/5] archive-zip: support files " René Scharfe
  2017-04-24 21:11                     ` Keith Goldfarb
@ 2017-04-25  4:46                     ` Junio C Hamano
  2017-04-25  5:27                       ` René Scharfe
  1 sibling, 1 reply; 44+ messages in thread
From: Junio C Hamano @ 2017-04-25  4:46 UTC (permalink / raw)
  To: René Scharfe; +Cc: Peter Krefting, Johannes Sixt, git, Keith Goldfarb

René Scharfe <l.s.r@web.de> writes:

> diff --git a/t/t5004-archive-corner-cases.sh b/t/t5004-archive-corner-cases.sh
> index 0ac94b5cc9..a6875dfdb1 100755
> --- a/t/t5004-archive-corner-cases.sh
> +++ b/t/t5004-archive-corner-cases.sh
> @@ -178,7 +178,7 @@ test_expect_success EXPENSIVE,UNZIP 'zip archive bigger than 4GB' '
>  	"$GIT_UNZIP" -t many-big.zip
>  '
>  
> -test_expect_failure EXPENSIVE,UNZIP 'zip archive with files bigger than 4GB' '
> +test_expect_success EXPENSIVE,UNZIP,ZIPINFO 'zip archive with files bigger than 4GB' '

This is a bit curious, as 1/5 adds this test that expects a failure
with three prerequisites already.  I'll assume that this is a rebase
glitch and the preimage actually must have ,ZIPINFO there already.

>  	# Pack created with:
>  	#   dd if=/dev/zero of=file bs=1M count=4100 && git hash-object -w file
>  	mkdir -p .git/objects/pack &&

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v3 2/5] archive-zip: use strbuf for ZIP directory
  2017-04-24 17:30                   ` [PATCH v3 2/5] archive-zip: use strbuf for ZIP directory René Scharfe
@ 2017-04-25  4:51                     ` Junio C Hamano
  2017-04-25  5:28                       ` René Scharfe
  0 siblings, 1 reply; 44+ messages in thread
From: Junio C Hamano @ 2017-04-25  4:51 UTC (permalink / raw)
  To: René Scharfe; +Cc: Peter Krefting, Johannes Sixt, git, Keith Goldfarb

René Scharfe <l.s.r@web.de> writes:

> Keep the ZIP central director, which is written after all archive

Is this typoed "directorY"?  I know there was discussion on the
correct terminology I only skimmed and I am too lazy to go back
there to understand it to answer this question myself, so...

> -#define ZIP_DIRECTORY_MIN_SIZE	(1024 * 1024)

This tells me that there is this thing called ZIP directory, so
probably my guess above is correct ;-)


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v3 5/5] archive-zip: support files bigger than 4GB
  2017-04-25  4:46                     ` Junio C Hamano
@ 2017-04-25  5:27                       ` René Scharfe
  0 siblings, 0 replies; 44+ messages in thread
From: René Scharfe @ 2017-04-25  5:27 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Peter Krefting, Johannes Sixt, git, Keith Goldfarb

Am 25.04.2017 um 06:46 schrieb Junio C Hamano:
> René Scharfe <l.s.r@web.de> writes:
> 
>> diff --git a/t/t5004-archive-corner-cases.sh b/t/t5004-archive-corner-cases.sh
>> index 0ac94b5cc9..a6875dfdb1 100755
>> --- a/t/t5004-archive-corner-cases.sh
>> +++ b/t/t5004-archive-corner-cases.sh
>> @@ -178,7 +178,7 @@ test_expect_success EXPENSIVE,UNZIP 'zip archive bigger than 4GB' '
>>   	"$GIT_UNZIP" -t many-big.zip
>>   '
>>   
>> -test_expect_failure EXPENSIVE,UNZIP 'zip archive with files bigger than 4GB' '
>> +test_expect_success EXPENSIVE,UNZIP,ZIPINFO 'zip archive with files bigger than 4GB' '
> 
> This is a bit curious, as 1/5 adds this test that expects a failure
> with three prerequisites already.  I'll assume that this is a rebase
> glitch and the preimage actually must have ,ZIPINFO there already.

Yes, indeed -- I shouldn't try to do last minute edits.

René

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v3 2/5] archive-zip: use strbuf for ZIP directory
  2017-04-25  4:51                     ` Junio C Hamano
@ 2017-04-25  5:28                       ` René Scharfe
  0 siblings, 0 replies; 44+ messages in thread
From: René Scharfe @ 2017-04-25  5:28 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Peter Krefting, Johannes Sixt, git, Keith Goldfarb

Am 25.04.2017 um 06:51 schrieb Junio C Hamano:
> René Scharfe <l.s.r@web.de> writes:
> 
>> Keep the ZIP central director, which is written after all archive
> 
> Is this typoed "directorY"?  I know there was discussion on the
> correct terminology I only skimmed and I am too lazy to go back
> there to understand it to answer this question myself, so...

Yep, a typo, this patch touches a "directory", as stated in the subject.

The "ZIP central director" would be PKWARE Inc., I guess? ;-)  The patch 
doesn't affect them..

René

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v3 4/5] archive-zip: support archives bigger than 4GB
  2017-04-24 20:06                       ` René Scharfe
  2017-04-24 20:39                         ` René Scharfe
  2017-04-24 21:02                         ` Johannes Sixt
@ 2017-04-25  7:55                         ` Peter Krefting
  2017-04-25 16:24                           ` René Scharfe
  2 siblings, 1 reply; 44+ messages in thread
From: Peter Krefting @ 2017-04-25  7:55 UTC (permalink / raw)
  To: René Scharfe; +Cc: Johannes Sixt, git, Keith Goldfarb

René Scharfe:

>> This needs to be >=. The spec says that if the value is 0xffffffff, there 
>> should be a zip64 record with the actual size (even if it is 0xffffffff).
> Could you please cite the relevant part?

4.4.8 compressed size: (4 bytes)
4.4.9 uncompressed size: (4 bytes)

"If an archive is in ZIP64 format and the value in this field is 
0xFFFFFFFF, the size will be in the corresponding 8 byte ZIP64 
extended information extra field."


Of course, there is no definition of how they define that "an archive 
is in ZIP64 format", but I would say that is whenever it has any ZIP64 
structures.

-- 
\\// Peter - http://www.softwolves.pp.se/

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v3 4/5] archive-zip: support archives bigger than 4GB
  2017-04-25  7:55                         ` Peter Krefting
@ 2017-04-25 16:24                           ` René Scharfe
  2017-04-26 21:02                             ` Peter Krefting
  0 siblings, 1 reply; 44+ messages in thread
From: René Scharfe @ 2017-04-25 16:24 UTC (permalink / raw)
  To: Peter Krefting; +Cc: Johannes Sixt, git, Keith Goldfarb

Am 25.04.2017 um 09:55 schrieb Peter Krefting:
> René Scharfe:
> 
>>> This needs to be >=. The spec says that if the value is 0xffffffff, 
>>> there should be a zip64 record with the actual size (even if it is 
>>> 0xffffffff).
>> Could you please cite the relevant part?
> 
> 4.4.8 compressed size: (4 bytes)
> 4.4.9 uncompressed size: (4 bytes)
> 
> "If an archive is in ZIP64 format and the value in this field is 
> 0xFFFFFFFF, the size will be in the corresponding 8 byte ZIP64 extended 
> information extra field."
> 
> 
> Of course, there is no definition of how they define that "an archive is 
> in ZIP64 format", but I would say that is whenever it has any ZIP64 
> structures.

I struggled with that sentence as well.  There is no explicit "format"
field AFAICS.  The closest at the archive level are zip64 end of central
directory record and locator.  But what really matters is the presence
of a zip64 extended information extra field to hold the 64-bit size
value.

There's also this general note a bit higher up:

       "4.4.1.4  If one of the fields in the end of central directory
       record is too small to hold required data, the field should be
       set to -1 (0xFFFF or 0xFFFFFFFF) and the ZIP64 format record
       should be created."

My interpretation: An archiver that can only emit 32-bit ZIP files
(either because it doesn't support ZIP64 or due to a compatibility
option set by the user) writes 32-bit size fields and has no defined way
to deal with overflows.  An archiver that is allowed to use ZIP64 can
emit zip64 extras as needed.

Or in other words: A legacy ZIP archive and a ZIP64 archive can be
bit-wise the same if all values for all entries fit into the legacy
fields, but the difference in terms of the spec is what the archiver was
allowed to do when it created them.

	# 4-byte sizes, not ZIP64
	arch --format=zip ...

	# ZIP64, can use 8-byte sizes as needed
	arch --format=zip64 ...

Makes sense?

René

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v3 4/5] archive-zip: support archives bigger than 4GB
  2017-04-25 16:24                           ` René Scharfe
@ 2017-04-26 21:02                             ` Peter Krefting
  2017-04-26 23:38                               ` René Scharfe
  0 siblings, 1 reply; 44+ messages in thread
From: Peter Krefting @ 2017-04-26 21:02 UTC (permalink / raw)
  To: René Scharfe; +Cc: Johannes Sixt, git, Keith Goldfarb

René Scharfe:

> I struggled with that sentence as well.  There is no explicit 
> "format" field AFAICS.

Exactly. I interpret that as it is in zip64 format if there are any 
zip64 structures in the archive (especially if there is a zip64 
end of central directory locator).

> Or in other words: A legacy ZIP archive and a ZIP64 archive can be 
> bit-wise the same if all values for all entries fit into the legacy 
> fields, but the difference in terms of the spec is what the archiver 
> was allowed to do when it created them.

As long as all sizes are below (unsigned) -1, then they would be 
identical. If one, and only one, of the sizes are equal to (unsigned) 
-1 (and none overflow), then it is up to intepretation whether or not 
a ZIP64-aware archiver is allowed to output an archive that is not in 
ZIP64 format. If any single size or value overflows the 32 (16) bit 
values, then ZIP64 format is needed.

> 	# 4-byte sizes, not ZIP64
> 	arch --format=zip ...
>
> 	# ZIP64, can use 8-byte sizes as needed
> 	arch --format=zip64 ...
>
> Makes sense?

Well, I would say that it would be a lot easier to always emit zip64 
archives. An old-style unzipper should be able to read them anyway if 
there are no overflowing fields, right? And, besides, who in 2017 has 
an unzip tool that is unable to read zip64? Info-Zip UnZip has 
supported Zip64 since 2009.

-- 
\\// Peter - http://www.softwolves.pp.se/

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v3 4/5] archive-zip: support archives bigger than 4GB
  2017-04-26 21:02                             ` Peter Krefting
@ 2017-04-26 23:38                               ` René Scharfe
  2017-04-27  4:57                                 ` Peter Krefting
  0 siblings, 1 reply; 44+ messages in thread
From: René Scharfe @ 2017-04-26 23:38 UTC (permalink / raw)
  To: Peter Krefting; +Cc: Johannes Sixt, git, Keith Goldfarb

Am 26.04.2017 um 23:02 schrieb Peter Krefting:
> René Scharfe:
> 
>> I struggled with that sentence as well.  There is no explicit "format" 
>> field AFAICS.
> 
> Exactly. I interpret that as it is in zip64 format if there are any 
> zip64 structures in the archive (especially if there is a zip64 end of 
> central directory locator).

The crucial point is that I think the choice is per entry, i.e. if we
had to write a zip64 record for one file we can still emit a legacy
record for the next file that has a size of 0xffffffff.

>> Or in other words: A legacy ZIP archive and a ZIP64 archive can be 
>> bit-wise the same if all values for all entries fit into the legacy 
>> fields, but the difference in terms of the spec is what the archiver 
>> was allowed to do when it created them.
> 
> As long as all sizes are below (unsigned) -1, then they would be 
> identical. If one, and only one, of the sizes are equal to (unsigned) -1 
> (and none overflow), then it is up to intepretation whether or not a 
> ZIP64-aware archiver is allowed to output an archive that is not in 
> ZIP64 format. If any single size or value overflows the 32 (16) bit 
> values, then ZIP64 format is needed.

Sizes can be stored in zip64 entries even if they are lower (from a
paragraph about the data descriptor):

"4.3.9.2 When compressing files, compressed and uncompressed sizes
       should be stored in ZIP64 format (as 8 byte values) when a
       file's size exceeds 0xFFFFFFFF.   However ZIP64 format may be
       used regardless of the size of a file."

(But I don't see a benefit.)

>>     # 4-byte sizes, not ZIP64
>>     arch --format=zip ...
>>
>>     # ZIP64, can use 8-byte sizes as needed
>>     arch --format=zip64 ...
>>
>> Makes sense?
> 
> Well, I would say that it would be a lot easier to always emit zip64 
> archives. An old-style unzipper should be able to read them anyway if 
> there are no overflowing fields, right? And, besides, who in 2017 has an 
> unzip tool that is unable to read zip64? Info-Zip UnZip has supported 
> Zip64 since 2009.

Windows XP.  Don't laugh. ;)

If you write zip64 extras for all size records then an old extractor
will only see the value 0xffffffff in them and ignore the zip64 part --
or ignore the entries outright.

Writing zip64 records only as needed saves space -- and that's what
zipping is all about, isn't it?

Adding unnecessary zip64 records would produce different ZIP files than
earlier version of git archive.  That's not a strong argument as changes
to libz can potentially do the same, but it still might affect someone
who caches generated ZIP files.

What I sent matches the behavior of InfoZIP zip (modulo bugs).  Why not
follow their lead?

(And one of the bugs in my patches not setting the version field to 45
as you pointed out earlier already.  InfoZIP may forget to do that if it
uses a zip64 extra for recording the offset, but it does set the version
correctly for files bigger than 4GB.)

What do other archivers do?

But I think a more important question is: Can the generated files
be extracted by popular tools (most importantly Windows' built-in
functionality, I guess)?

René

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v3 4/5] archive-zip: support archives bigger than 4GB
  2017-04-26 23:38                               ` René Scharfe
@ 2017-04-27  4:57                                 ` Peter Krefting
  2017-04-27 19:54                                   ` René Scharfe
  0 siblings, 1 reply; 44+ messages in thread
From: Peter Krefting @ 2017-04-27  4:57 UTC (permalink / raw)
  To: René Scharfe; +Cc: Johannes Sixt, git, Keith Goldfarb

René Scharfe:

> Sizes can be stored in zip64 entries even if they are lower (from a 
> paragraph about the data descriptor):
>
> "4.3.9.2 When compressing files, compressed and uncompressed sizes
>      should be stored in ZIP64 format (as 8 byte values) when a
>      file's size exceeds 0xFFFFFFFF.   However ZIP64 format may be
>      used regardless of the size of a file."

That is only for the data descriptor. And this is the really confusing 
one as it only has the sizes, and do not have an indication anywhere 
of which format it is in. So they are either 32-bit or 64-bit, 
depending on whether it is a ZIP64 format archive, but it doesn't 
define how to tell.

For regular entries, the text is a bit clearer about the -1, though.

> Windows XP.  Don't laugh. ;)

You can always install 7-zip or something to extract on XP.

> What do other archivers do?

You should compare with what bsdtar (libarchive) does in zip64 mode. 
It also only ever does streaming mode (with data descriptors and 
such), and it does zip64.

> But I think a more important question is: Can the generated files be 
> extracted by popular tools (most importantly Windows' built-in 
> functionality, I guess)?

OK, so only enable zip64 mode if there are files >4G or the archive 
ends up being >4G. But the question is how we can tell, especially in 
streaming mode, and especially if data descriptors are magical...

-- 
\\// Peter - http://www.softwolves.pp.se/

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v3 4/5] archive-zip: support archives bigger than 4GB
  2017-04-27  4:57                                 ` Peter Krefting
@ 2017-04-27 19:54                                   ` René Scharfe
  2017-04-28  8:40                                     ` Peter Krefting
  0 siblings, 1 reply; 44+ messages in thread
From: René Scharfe @ 2017-04-27 19:54 UTC (permalink / raw)
  To: Peter Krefting; +Cc: Johannes Sixt, git, Keith Goldfarb

Am 27.04.2017 um 06:57 schrieb Peter Krefting:
> René Scharfe:
>> Windows XP.  Don't laugh. ;)
>
> You can always install 7-zip or something to extract on XP.

Sure, but if we were to start emitting zip64 records regardless of the
size of entries then we'd break compatibility.  We should have a very
good reason for doing that.  (I don't see the need so far.)

>> What do other archivers do?
> 
> You should compare with what bsdtar (libarchive) does in zip64 mode. It 
> also only ever does streaming mode (with data descriptors and such), and 
> it does zip64.

Good idea.  They do the same as InfoZIP (whose behavior I copied in
the series), i.e. emit zip64 records only for big files by default:

https://github.com/libarchive/libarchive/blob/master/libarchive/archive_write_set_format_zip.c#L722

They set the bar much higher for the uncompressed size of streamed
files; they emit zip64 extras only for sizes bigger than 0xff000000.

>> But I think a more important question is: Can the generated files be 
>> extracted by popular tools (most importantly Windows' built-in 
>> functionality, I guess)?
> 
> OK, so only enable zip64 mode if there are files >4G or the archive ends 
> up being >4G. But the question is how we can tell, especially in 
> streaming mode, and especially if data descriptors are magical...

The type of descriptor to use depends on the presence of 64-bit
sizes in a zip64 extra for that record.  For streaming compression
some kind of threshold lower than 0xffffffff needs to be set,
because deflate can increase the size of the result.

René

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v3 4/5] archive-zip: support archives bigger than 4GB
  2017-04-27 19:54                                   ` René Scharfe
@ 2017-04-28  8:40                                     ` Peter Krefting
  0 siblings, 0 replies; 44+ messages in thread
From: Peter Krefting @ 2017-04-28  8:40 UTC (permalink / raw)
  To: René Scharfe; +Cc: Johannes Sixt, git, Keith Goldfarb

René Scharfe:

> Sure, but if we were to start emitting zip64 records regardless of 
> the size of entries then we'd break compatibility.  We should have a 
> very good reason for doing that.  (I don't see the need so far.)

Sure, sounds good.

> The type of descriptor to use depends on the presence of 64-bit 
> sizes in a zip64 extra for that record.  For streaming compression 
> some kind of threshold lower than 0xffffffff needs to be set, 
> because deflate can increase the size of the result.

Indeed. And it seems that they use the version identifier (>= 45) to 
check whether it is in "zip64 format" or not. It seems a bit hit or 
miss to me, the best would be to always use the pre-amble descriptor, 
but that requires holding the entire compressed data in memory (or 
using temporary files or running two passes), neither which are very 
good ideas.

-- 
\\// Peter - http://www.softwolves.pp.se/

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v3 0/5] archive-zip: support files and archives bigger than 4GB
  2017-04-24 17:22                 ` [PATCH v3 0/5] archive-zip: support files and archives bigger than 4GB René Scharfe
                                     ` (4 preceding siblings ...)
  2017-04-24 17:33                   ` [PATCH v3 5/5] archive-zip: support files " René Scharfe
@ 2017-04-29 21:00                   ` Torsten Bögershausen
  2017-04-29 22:28                     ` René Scharfe
  5 siblings, 1 reply; 44+ messages in thread
From: Torsten Bögershausen @ 2017-04-29 21:00 UTC (permalink / raw)
  To: René Scharfe, Peter Krefting, Johannes Sixt; +Cc: git, Keith Goldfarb

On 2017-04-24 19:22, René Scharfe wrote:
> The first patch adds (expensive) tests, the next two are cleanups which
> set the stage for the remaining two to actually implement zip64 support
> for offsets and file sizes.
> 
> Half of the series had been laying around for months, half-finished and
> forgotten because I got distracted by the holiday season. :-/
> 
>   archive-zip: add tests for big ZIP archives
>   archive-zip: use strbuf for ZIP directory
>   archive-zip: write ZIP dir entry directly to strbuf
>   archive-zip: support archives bigger than 4GB
>   archive-zip: support files bigger than 4GB
> 
>  archive-zip.c                   | 211 ++++++++++++++++++++++++----------------
>  t/t5004-archive-corner-cases.sh |  45 +++++++++
>  t/t5004/big-pack.zip            | Bin 0 -> 7373 bytes
>  3 files changed, 172 insertions(+), 84 deletions(-)
>  create mode 100644 t/t5004/big-pack.zip
> 
This fails here under Mac OS:
commit 4cdf3f9d84568da72f1dcade812de7a42ecb6d15
Author: René Scharfe <l.s.r@web.de>
Date:   Mon Apr 24 19:33:34 2017 +0200

    archive-zip: support files bigger than 4GB

---------------------------
Parts of t5004.log, hope this is helpful:

"$GIT_UNZIP" -t many-big.zip

Archive:  many-big.zip
warning [many-big.zip]:  577175 extra bytes at beginning or within zipfile
  (attempting to process anyway)
error [many-big.zip]:  start of central directory not found;
  zipfile corrupt.
  (please check that you have transferred or created the zipfile in the
  appropriate BINARY mode and that you have compiled UnZip properly)
not ok 12 - zip archive bigger than 4GB
#	
#		# build string containing 65536 characters


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v3 0/5] archive-zip: support files and archives bigger than 4GB
  2017-04-29 21:00                   ` [PATCH v3 0/5] archive-zip: support files and archives " Torsten Bögershausen
@ 2017-04-29 22:28                     ` René Scharfe
  2017-04-30  5:31                       ` Torsten Bögershausen
  0 siblings, 1 reply; 44+ messages in thread
From: René Scharfe @ 2017-04-29 22:28 UTC (permalink / raw)
  To: Torsten Bögershausen, Peter Krefting, Johannes Sixt
  Cc: git, Keith Goldfarb

Am 29.04.2017 um 23:00 schrieb Torsten Bögershausen:
> This fails here under Mac OS:
> commit 4cdf3f9d84568da72f1dcade812de7a42ecb6d15
> Author: René Scharfe <l.s.r@web.de>
> Date:   Mon Apr 24 19:33:34 2017 +0200
> 
>      archive-zip: support files bigger than 4GB
> 
> ---------------------------
> Parts of t5004.log, hope this is helpful:
> 
> "$GIT_UNZIP" -t many-big.zip
> 
> Archive:  many-big.zip
> warning [many-big.zip]:  577175 extra bytes at beginning or within zipfile
>    (attempting to process anyway)
> error [many-big.zip]:  start of central directory not found;
>    zipfile corrupt.
>    (please check that you have transferred or created the zipfile in the
>    appropriate BINARY mode and that you have compiled UnZip properly)
> not ok 12 - zip archive bigger than 4GB
> #	
> #		# build string containing 65536 characters

Which version of unzip do you have (unzip -v, look for ZIP64_SUPPORT)?
It seems that (some version of?) OS X ships with an older unzip which
can't handle big files:

   https://superuser.com/questions/114011/extract-large-zip-file-50-gb-on-mac-os-x

Is the following check (zip archive with files bigger than 4GB) skipped,
e.g. because ZIPINFO is missing?  Otherwise I would expect it to fail as
well.

René


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v3 0/5] archive-zip: support files and archives bigger than 4GB
  2017-04-29 22:28                     ` René Scharfe
@ 2017-04-30  5:31                       ` Torsten Bögershausen
  2017-04-30  7:53                         ` René Scharfe
  0 siblings, 1 reply; 44+ messages in thread
From: Torsten Bögershausen @ 2017-04-30  5:31 UTC (permalink / raw)
  To: René Scharfe, Peter Krefting, Johannes Sixt; +Cc: git, Keith Goldfarb



On 30/04/17 00:28, René Scharfe wrote:
> Am 29.04.2017 um 23:00 schrieb Torsten Bögershausen:
>> This fails here under Mac OS:
>> commit 4cdf3f9d84568da72f1dcade812de7a42ecb6d15
>> Author: René Scharfe <l.s.r@web.de>
>> Date:   Mon Apr 24 19:33:34 2017 +0200
>>
>>      archive-zip: support files bigger than 4GB
>>
>> ---------------------------
>> Parts of t5004.log, hope this is helpful:
>>
>> "$GIT_UNZIP" -t many-big.zip
>>
>> Archive:  many-big.zip
>> warning [many-big.zip]:  577175 extra bytes at beginning or within zipfile
>>    (attempting to process anyway)
>> error [many-big.zip]:  start of central directory not found;
>>    zipfile corrupt.
>>    (please check that you have transferred or created the zipfile in the
>>    appropriate BINARY mode and that you have compiled UnZip properly)
>> not ok 12 - zip archive bigger than 4GB
>> #	
>> #		# build string containing 65536 characters
>
> Which version of unzip do you have (unzip -v, look for ZIP64_SUPPORT)?
> It seems that (some version of?) OS X ships with an older unzip which
> can't handle big files:
>
>    https://superuser.com/questions/114011/extract-large-zip-file-50-gb-on-mac-os-x
>
> Is the following check (zip archive with files bigger than 4GB) skipped,
> e.g. because ZIPINFO is missing?  Otherwise I would expect it to fail as
> well.
>
> René
>

Sorry, I was not looking careful enough, the macro `$GIT_UNZIP`
gave the impression that an unzip provided by Git (or the Git test framework) 
was used :-(

$ which unzip
/usr/bin/unzip

$ unzip -v
UnZip 5.52 of 28 February 2005, by Info-ZIP.  Maintained by C. Spieler.  Send
bug reports using http://www.info-zip.org/zip-bug.html; see README for details.

Latest sources and executables are at ftp://ftp.info-zip.org/pub/infozip/ ;
see ftp://ftp.info-zip.org/pub/infozip/UnZip.html for other sites.

Compiled with gcc 4.2.1 Compatible Apple LLVM 7.0.0 (clang-700.0.59.1) for Unix 
on Aug  1 2015.

UnZip special compilation options:
         COPYRIGHT_CLEAN (PKZIP 0.9x unreducing method not supported)
         SET_DIR_ATTRIB
         TIMESTAMP
         USE_EF_UT_TIME
         USE_UNSHRINK (PKZIP/Zip 1.x unshrinking method supported)
         USE_DEFLATE64 (PKZIP 4.x Deflate64(tm) supported)
         VMS_TEXT_CONV
         [decryption, version 2.9 of 05 May 2000]

UnZip and ZipInfo environment options:
            UNZIP:  [none]
         UNZIPOPT:  [none]
          ZIPINFO:  [none]
       ZIPINFOOPT:  [none]

-------------------
And here is the longer log:
not ok 12 - zip archive bigger than 4GB
#
#               # build string containing 65536 characters
# 
s=0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef &&
# 
s=$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s &&
# 
s=$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s &&
#
#               # create blob with a length of 65536 + 1 bytes
#               blob=$(echo $s | git hash-object -w --stdin) &&
#
#               # create tree containing 65500 entries of that blob
#               for i in $(test_seq 1 65500)
#               do
#                       echo "100644 blob $blob $i"
#               done >tree &&
#               tree=$(git mktree <tree) &&
#
#               # zip it, creating an archive a bit bigger than 4GB
#               git archive -0 -o many-big.zip $tree &&
#
#               "$GIT_UNZIP" -t many-big.zip 9999 65500 &&
#               "$GIT_UNZIP" -t many-big.zip
#

skipping test: zip archive with files bigger than 4GB
         # Pack created with:
         #   dd if=/dev/zero of=file bs=1M count=4100 && git hash-object -w file
         mkdir -p .git/objects/pack &&
         (
                 cd .git/objects/pack &&
                 "$GIT_UNZIP" "$TEST_DIRECTORY"/t5004/big-pack.zip
         ) &&
         blob=754a93d6fada4c6873360e6cb4b209132271ab0e &&
         size=$(expr 4100 "*" 1024 "*" 1024) &&

         # create a tree containing the file
         tree=$(echo "100644 blob $blob  big-file" | git mktree) &&

         # zip it, creating an archive with a file bigger than 4GB
         git archive -o big.zip $tree &&

         "$GIT_UNZIP" -t big.zip &&
         "$ZIPINFO" big.zip >big.lst &&
         grep $size big.lst

ok 13 # skip zip archive with files bigger than 4GB (missing ZIPINFO of 
EXPENSIVE,UNZIP,ZIPINFO)

# failed 1 among 13 test(s)
1..13



^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v3 0/5] archive-zip: support files and archives bigger than 4GB
  2017-04-30  5:31                       ` Torsten Bögershausen
@ 2017-04-30  7:53                         ` René Scharfe
  2017-04-30 13:06                           ` Torsten Bögershausen
  2017-04-30 16:32                           ` Johannes Sixt
  0 siblings, 2 replies; 44+ messages in thread
From: René Scharfe @ 2017-04-30  7:53 UTC (permalink / raw)
  To: Torsten Bögershausen, Peter Krefting, Johannes Sixt, Junio C Hamano
  Cc: git, Keith Goldfarb

Am 30.04.2017 um 07:31 schrieb Torsten Bögershausen:
> Sorry, I was not looking careful enough, the macro `$GIT_UNZIP`
> gave the impression that an unzip provided by Git (or the Git test 
> framework) was used :-(
> 
> $ which unzip
> /usr/bin/unzip
> 
> $ unzip -v
> UnZip 5.52 of 28 February 2005, by Info-ZIP.  Maintained by C. Spieler.  
> Send
> bug reports using http://www.info-zip.org/zip-bug.html; see README for 
> details.
> 
> Latest sources and executables are at ftp://ftp.info-zip.org/pub/infozip/ ;
> see ftp://ftp.info-zip.org/pub/infozip/UnZip.html for other sites.
> 
> Compiled with gcc 4.2.1 Compatible Apple LLVM 7.0.0 (clang-700.0.59.1) 
> for Unix on Aug  1 2015.
> 
> UnZip special compilation options:
>          COPYRIGHT_CLEAN (PKZIP 0.9x unreducing method not supported)
>          SET_DIR_ATTRIB
>          TIMESTAMP
>          USE_EF_UT_TIME
>          USE_UNSHRINK (PKZIP/Zip 1.x unshrinking method supported)
>          USE_DEFLATE64 (PKZIP 4.x Deflate64(tm) supported)
>          VMS_TEXT_CONV
>          [decryption, version 2.9 of 05 May 2000]
> 
> UnZip and ZipInfo environment options:
>             UNZIP:  [none]
>          UNZIPOPT:  [none]
>           ZIPINFO:  [none]
>        ZIPINFOOPT:  [none]

OK, so they indeed still ship the old version of unzip that doesn't
support big files.

> ok 13 # skip zip archive with files bigger than 4GB (missing ZIPINFO of 
> EXPENSIVE,UNZIP,ZIPINFO)

And if you had zipinfo then this test would certainly fail.

Anyway, thanks for running these expensive tests!  You could retry
with unzip version 6.00 and its zipinfo if you want.  But we
certainly need the following patch:

-- >8 --
Subject: [PATCH] t5004: require 64-bit support for big ZIP tests

Check if unzip supports the ZIP64 format and skip the tests that create
big archives otherwise.  Also skip the test that archives a big file on
32-bit platforms because the git object systems can't unpack files
bigger than 4GB there.

Reported-by: Torsten Bögershausen <tboegi@web.de>
Signed-off-by: Rene Scharfe <l.s.r@web.de>
---
 t/t5004-archive-corner-cases.sh | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/t/t5004-archive-corner-cases.sh b/t/t5004-archive-corner-cases.sh
index 9106c53c4c..fd23c75f59 100755
--- a/t/t5004-archive-corner-cases.sh
+++ b/t/t5004-archive-corner-cases.sh
@@ -27,6 +27,9 @@ check_dir() {
 	test_cmp expect actual
 }
 
+test_lazy_prereq UNZIP_ZIP64_SUPPORT '
+	"$GIT_UNZIP" -v | grep ZIP64_SUPPORT
+'
 
 # bsdtar/libarchive versions before 3.1.3 consider a tar file with a
 # global pax header that is not followed by a file record as corrupt.
@@ -155,7 +158,8 @@ test_expect_success ZIPINFO 'zip archive with many entries' '
 	test_cmp expect actual
 '
 
-test_expect_success EXPENSIVE,UNZIP 'zip archive bigger than 4GB' '
+test_expect_success EXPENSIVE,UNZIP,UNZIP_ZIP64_SUPPORT \
+	'zip archive bigger than 4GB' '
 	# build string containing 65536 characters
 	s=0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef &&
 	s=$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s &&
@@ -178,7 +182,8 @@ test_expect_success EXPENSIVE,UNZIP 'zip archive bigger than 4GB' '
 	"$GIT_UNZIP" -t many-big.zip
 '
 
-test_expect_success EXPENSIVE,UNZIP,ZIPINFO 'zip archive with files bigger than 4GB' '
+test_expect_success EXPENSIVE,LONG_IS_64BIT,UNZIP,UNZIP_ZIP64_SUPPORT,ZIPINFO \
+	'zip archive with files bigger than 4GB' '
 	# Pack created with:
 	#   dd if=/dev/zero of=file bs=1M count=4100 && git hash-object -w file
 	mkdir -p .git/objects/pack &&
-- 
2.12.2

^ permalink raw reply related	[flat|nested] 44+ messages in thread

* Re: [PATCH v3 0/5] archive-zip: support files and archives bigger than 4GB
  2017-04-30  7:53                         ` René Scharfe
@ 2017-04-30 13:06                           ` Torsten Bögershausen
  2017-04-30 16:32                           ` Johannes Sixt
  1 sibling, 0 replies; 44+ messages in thread
From: Torsten Bögershausen @ 2017-04-30 13:06 UTC (permalink / raw)
  To: René Scharfe, Peter Krefting, Johannes Sixt, Junio C Hamano
  Cc: git, Keith Goldfarb

On 2017-04-30 09:53, René Scharfe wrote:
> Am 30.04.2017 um 07:31 schrieb Torsten Bögershausen:
>> Sorry, I was not looking careful enough, the macro `$GIT_UNZIP`
>> gave the impression that an unzip provided by Git (or the Git test 
>> framework) was used :-(
>>
>> $ which unzip
>> /usr/bin/unzip
>>
>> $ unzip -v
>> UnZip 5.52 of 28 February 2005, by Info-ZIP.  Maintained by C. Spieler.  
>> Send
>> bug reports using http://www.info-zip.org/zip-bug.html; see README for 
>> details.
>>
>> Latest sources and executables are at ftp://ftp.info-zip.org/pub/infozip/ ;
>> see ftp://ftp.info-zip.org/pub/infozip/UnZip.html for other sites.
>>
>> Compiled with gcc 4.2.1 Compatible Apple LLVM 7.0.0 (clang-700.0.59.1) 
>> for Unix on Aug  1 2015.
>>
>> UnZip special compilation options:
>>          COPYRIGHT_CLEAN (PKZIP 0.9x unreducing method not supported)
>>          SET_DIR_ATTRIB
>>          TIMESTAMP
>>          USE_EF_UT_TIME
>>          USE_UNSHRINK (PKZIP/Zip 1.x unshrinking method supported)
>>          USE_DEFLATE64 (PKZIP 4.x Deflate64(tm) supported)
>>          VMS_TEXT_CONV
>>          [decryption, version 2.9 of 05 May 2000]
>>
>> UnZip and ZipInfo environment options:
>>             UNZIP:  [none]
>>          UNZIPOPT:  [none]
>>           ZIPINFO:  [none]
>>        ZIPINFOOPT:  [none]
> 
> OK, so they indeed still ship the old version of unzip that doesn't
> support big files.
> 
>> ok 13 # skip zip archive with files bigger than 4GB (missing ZIPINFO of 
>> EXPENSIVE,UNZIP,ZIPINFO)
> 
> And if you had zipinfo then this test would certainly fail.
> 

After installing unzip via mac ports, I have:

$ which zipinfo
/opt/local/bin/zipinfo

$ which unzip
 /opt/local/bin/unzip

$ unzip -v
UnZip 6.00 of 20 April 2009, by Info-ZIP.  Maintained by C. Spieler.  Send
bug reports using http://www.info-zip.org/zip-bug.html; see README for details.

Latest sources and executables are at ftp://ftp.info-zip.org/pub/infozip/ ;
see ftp://ftp.info-zip.org/pub/infozip/UnZip.html for other sites.

Compiled with gcc 4.2.1 Compatible Apple LLVM 7.0.2 (clang-700.1.81) for Unix
Mac OS X on Jan 31 2016.

UnZip special compilation options:
        COPYRIGHT_CLEAN (PKZIP 0.9x unreducing method not supported)
        SET_DIR_ATTRIB
        SYMLINKS (symbolic links supported, if RTL and file system permit)
        TIMESTAMP
        UNIXBACKUP
        USE_EF_UT_TIME
        USE_UNSHRINK (PKZIP/Zip 1.x unshrinking method supported)
        USE_DEFLATE64 (PKZIP 4.x Deflate64(tm) supported)
        VMS_TEXT_CONV
        [decryption, version 2.11 of 05 Jan 2007]

UnZip and ZipInfo environment options:
           UNZIP:  [none]
        UNZIPOPT:  [none]
         ZIPINFO:  [none]
      ZIPINFOOPT:  [none]


> Anyway, thanks for running these expensive tests!  You could retry
> with unzip version 6.00 and its zipinfo if you want.  But we
> certainly need the following patch:
> 

The 6.0 is not compiled with ZIP64_SUPPORT....
After manually doing a copy-paste of your patch from below, tests are skipped:

ok 11 - zip archive with many entries
ok 12 # skip zip archive bigger than 4GB (missing UNZIP_ZIP64_SUPPORT of
EXPENSIVE,UNZIP,UNZIP_ZIP64_SUPPORT)
ok 13 # skip zip archive with files bigger than 4GB (missing UNZIP_ZIP64_SUPPORT
of EXPENSIVE,UNZIP,UNZIP_ZIP64_SUPPORT,ZIPINFO)

[patch snipped]


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v3 0/5] archive-zip: support files and archives bigger than 4GB
  2017-04-30  7:53                         ` René Scharfe
  2017-04-30 13:06                           ` Torsten Bögershausen
@ 2017-04-30 16:32                           ` Johannes Sixt
  2017-04-30 16:40                             ` René Scharfe
  1 sibling, 1 reply; 44+ messages in thread
From: Johannes Sixt @ 2017-04-30 16:32 UTC (permalink / raw)
  To: René Scharfe
  Cc: Torsten Bögershausen, Peter Krefting, Junio C Hamano, git,
	Keith Goldfarb

Am 30.04.2017 um 09:53 schrieb René Scharfe:
> @@ -178,7 +182,8 @@ test_expect_success EXPENSIVE,UNZIP 'zip archive bigger than 4GB' '
>  	"$GIT_UNZIP" -t many-big.zip
>  '
>
> -test_expect_success EXPENSIVE,UNZIP,ZIPINFO 'zip archive with files bigger than 4GB' '
> +test_expect_success EXPENSIVE,LONG_IS_64BIT,UNZIP,UNZIP_ZIP64_SUPPORT,ZIPINFO \

Why is LONG_IS_64BIT required?

> +	'zip archive with files bigger than 4GB' '
>  	# Pack created with:
>  	#   dd if=/dev/zero of=file bs=1M count=4100 && git hash-object -w file
>  	mkdir -p .git/objects/pack &&
>

-- Hannes


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v3 0/5] archive-zip: support files and archives bigger than 4GB
  2017-04-30 16:32                           ` Johannes Sixt
@ 2017-04-30 16:40                             ` René Scharfe
  2017-04-30 23:49                               ` Junio C Hamano
  0 siblings, 1 reply; 44+ messages in thread
From: René Scharfe @ 2017-04-30 16:40 UTC (permalink / raw)
  To: Johannes Sixt
  Cc: Torsten Bögershausen, Peter Krefting, Junio C Hamano, git,
	Keith Goldfarb

Am 30.04.2017 um 18:32 schrieb Johannes Sixt:
> Am 30.04.2017 um 09:53 schrieb René Scharfe:
>> @@ -178,7 +182,8 @@ test_expect_success EXPENSIVE,UNZIP 'zip archive 
>> bigger than 4GB' '
>>      "$GIT_UNZIP" -t many-big.zip
>>  '
>>
>> -test_expect_success EXPENSIVE,UNZIP,ZIPINFO 'zip archive with files 
>> bigger than 4GB' '
>> +test_expect_success 
>> EXPENSIVE,LONG_IS_64BIT,UNZIP,UNZIP_ZIP64_SUPPORT,ZIPINFO \
> 
> Why is LONG_IS_64BIT required?

Blob sizes are kept in variables of type unsigned long.  64 bits are
required to store file sizes bigger than 4GB, and this test is about
such a file.  A 32-bit git can't use the pack we supply the test file
in, so we have to skip this test.

>> +    'zip archive with files bigger than 4GB' '
>>      # Pack created with:
>>      #   dd if=/dev/zero of=file bs=1M count=4100 && git hash-object 
>> -w file
>>      mkdir -p .git/objects/pack &&
>>
> 
> -- Hannes
> 

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v3 0/5] archive-zip: support files and archives bigger than 4GB
  2017-04-30 16:40                             ` René Scharfe
@ 2017-04-30 23:49                               ` Junio C Hamano
  2017-05-01  8:30                                 ` René Scharfe
  0 siblings, 1 reply; 44+ messages in thread
From: Junio C Hamano @ 2017-04-30 23:49 UTC (permalink / raw)
  To: René Scharfe
  Cc: Johannes Sixt, Torsten Bögershausen, Peter Krefting, git,
	Keith Goldfarb

René Scharfe <l.s.r@web.de> writes:

> Am 30.04.2017 um 18:32 schrieb Johannes Sixt:
>> Am 30.04.2017 um 09:53 schrieb René Scharfe:
>>> @@ -178,7 +182,8 @@ test_expect_success EXPENSIVE,UNZIP 'zip
>>> archive bigger than 4GB' '
>>>      "$GIT_UNZIP" -t many-big.zip
>>>  '
>>>
>>> -test_expect_success EXPENSIVE,UNZIP,ZIPINFO 'zip archive with
>>> files bigger than 4GB' '
>>> +test_expect_success
>>> EXPENSIVE,LONG_IS_64BIT,UNZIP,UNZIP_ZIP64_SUPPORT,ZIPINFO \
>>
>> Why is LONG_IS_64BIT required?
>
> Blob sizes are kept in variables of type unsigned long.  64 bits are
> required to store file sizes bigger than 4GB, and this test is about
> such a file.  A 32-bit git can't use the pack we supply the test file
> in, so we have to skip this test.
>
>>> +    'zip archive with files bigger than 4GB' '
>>>      # Pack created with:
>>>      #   dd if=/dev/zero of=file bs=1M count=4100 && git
>>> hash-object -w file
>>>      mkdir -p .git/objects/pack &&

OK, so let's queue this on top and have it in 'next' to unblock
users of older unzip and unzip compiled wihtout zip64 support?

Thanks.



^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v3 0/5] archive-zip: support files and archives bigger than 4GB
  2017-04-30 23:49                               ` Junio C Hamano
@ 2017-05-01  8:30                                 ` René Scharfe
  0 siblings, 0 replies; 44+ messages in thread
From: René Scharfe @ 2017-05-01  8:30 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Johannes Sixt, Torsten Bögershausen, Peter Krefting, git,
	Keith Goldfarb

Am 01.05.2017 um 01:49 schrieb Junio C Hamano:
> René Scharfe <l.s.r@web.de> writes:
> 
>> Am 30.04.2017 um 18:32 schrieb Johannes Sixt:
>>> Am 30.04.2017 um 09:53 schrieb René Scharfe:
>>>> @@ -178,7 +182,8 @@ test_expect_success EXPENSIVE,UNZIP 'zip
>>>> archive bigger than 4GB' '
>>>>       "$GIT_UNZIP" -t many-big.zip
>>>>   '
>>>>
>>>> -test_expect_success EXPENSIVE,UNZIP,ZIPINFO 'zip archive with
>>>> files bigger than 4GB' '
>>>> +test_expect_success
>>>> EXPENSIVE,LONG_IS_64BIT,UNZIP,UNZIP_ZIP64_SUPPORT,ZIPINFO \
>>>
>>> Why is LONG_IS_64BIT required?
>>
>> Blob sizes are kept in variables of type unsigned long.  64 bits are
>> required to store file sizes bigger than 4GB, and this test is about
>> such a file.  A 32-bit git can't use the pack we supply the test file
>> in, so we have to skip this test.
>>
>>>> +    'zip archive with files bigger than 4GB' '
>>>>       # Pack created with:
>>>>       #   dd if=/dev/zero of=file bs=1M count=4100 && git
>>>> hash-object -w file
>>>>       mkdir -p .git/objects/pack &&
> 
> OK, so let's queue this on top and have it in 'next' to unblock
> users of older unzip and unzip compiled wihtout zip64 support?

Yes, please; it allows them to run t5004 with --long-tests or 
GIT_TEST_LONG set (but not to actually test zip64 functionality).

René

^ permalink raw reply	[flat|nested] 44+ messages in thread

end of thread, other threads:[~2017-05-01  8:30 UTC | newest]

Thread overview: 44+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-04-21 21:08 Git archive doesn't fully support zip64 Keith Goldfarb
2017-04-22 19:22 ` [PATCH] archive-zip: Add zip64 headers when file size is too large for 32 bits Peter Krefting
2017-04-22 21:52   ` Johannes Sixt
2017-04-22 22:41     ` [PATCH v2] " Peter Krefting
2017-04-23  7:50       ` Johannes Sixt
2017-04-23 14:51         ` Peter Krefting
2017-04-23 19:49           ` Johannes Sixt
2017-04-24  8:04             ` Peter Krefting
2017-04-24 12:04               ` René Scharfe
2017-04-24 17:22                 ` [PATCH v3 0/5] archive-zip: support files and archives bigger than 4GB René Scharfe
2017-04-24 17:29                   ` [PATCH v3 1/5] archive-zip: add tests for big ZIP archives René Scharfe
2017-04-24 17:30                   ` [PATCH v3 2/5] archive-zip: use strbuf for ZIP directory René Scharfe
2017-04-25  4:51                     ` Junio C Hamano
2017-04-25  5:28                       ` René Scharfe
2017-04-24 17:31                   ` [PATCH v3 3/5] archive-zip: write ZIP dir entry directly to strbuf René Scharfe
2017-04-24 17:32                   ` [PATCH v3 4/5] archive-zip: support archives bigger than 4GB René Scharfe
2017-04-24 18:24                     ` Peter Krefting
2017-04-24 20:06                       ` René Scharfe
2017-04-24 20:39                         ` René Scharfe
2017-04-24 21:02                         ` Johannes Sixt
2017-04-24 21:41                           ` René Scharfe
2017-04-25  7:55                         ` Peter Krefting
2017-04-25 16:24                           ` René Scharfe
2017-04-26 21:02                             ` Peter Krefting
2017-04-26 23:38                               ` René Scharfe
2017-04-27  4:57                                 ` Peter Krefting
2017-04-27 19:54                                   ` René Scharfe
2017-04-28  8:40                                     ` Peter Krefting
2017-04-24 17:33                   ` [PATCH v3 5/5] archive-zip: support files " René Scharfe
2017-04-24 21:11                     ` Keith Goldfarb
2017-04-25  4:46                     ` Junio C Hamano
2017-04-25  5:27                       ` René Scharfe
2017-04-29 21:00                   ` [PATCH v3 0/5] archive-zip: support files and archives " Torsten Bögershausen
2017-04-29 22:28                     ` René Scharfe
2017-04-30  5:31                       ` Torsten Bögershausen
2017-04-30  7:53                         ` René Scharfe
2017-04-30 13:06                           ` Torsten Bögershausen
2017-04-30 16:32                           ` Johannes Sixt
2017-04-30 16:40                             ` René Scharfe
2017-04-30 23:49                               ` Junio C Hamano
2017-05-01  8:30                                 ` René Scharfe
2017-04-23  0:16     ` [PATCH] archive-zip: Add zip64 headers when file size is too large for 32 bits René Scharfe
2017-04-23  6:42       ` Peter Krefting
2017-04-23  7:27         ` Johannes Sixt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).