All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Krefting <peter@softwolves.pp.se>
To: git@vger.kernel.org
Cc: Keith Goldfarb <keith@blackthorn-media.com>
Subject: [PATCH] archive-zip: Add zip64 headers when file size is too large for 32 bits
Date: Sat, 22 Apr 2017 20:22:37 +0100 (CET)	[thread overview]
Message-ID: <alpine.DEB.2.11.1704222019420.12779@perkele.intern.softwolves.pp.se> (raw)
In-Reply-To: <3C736801-6BB8-41CC-88FF-C42FC853A736@blackthorn-media.com>

If the size of the files in the archive cannot be expressed in 32 bits, or
the offset in the zip file itself, add zip64 local headers with the actual
size. If we do find such entries, we also set a flag to force the creation
of a zip64 end of central directory record.

Signed-off-by: Peter Krefting <peter@softwolves.pp.se>
---
  archive-zip.c | 50 +++++++++++++++++++++++++++++++++++++++++---------
  1 file changed, 41 insertions(+), 9 deletions(-)

> git archive, when writing a zip file, has a silent 4GB file size 
> limit (on the inputs as well as the output), as it doesn’t fully 
> support zip64.

Yeah, it seems that the zip64 support that was added was to support 
more than 65535 files, but it did not add support for 64-bit file 
sizes or ZIP archives.

Try the below patch, it seems to work for me with a repository with 
two files of 4 Gbyte plus a few bytes. Haven't tested the case where 
the archive itself is larger than 4 Gbyte, but that ought to work, too.

diff --git a/archive-zip.c b/archive-zip.c
index b429a8d..c76a9b4 100644
--- a/archive-zip.c
+++ b/archive-zip.c
@@ -10,6 +10,7 @@

  static int zip_date;
  static int zip_time;
+static int zip_zip64;

  static unsigned char *zip_dir;
  static unsigned int zip_dir_size;
@@ -88,6 +89,16 @@ struct zip_extra_mtime {
  	unsigned char _end[1];
  };

+struct zip_extra_zip64 {
+	unsigned char magic[2];
+	unsigned char extra_size[2];
+	unsigned char size[8];
+	unsigned char compressed_size[8];
+	unsigned char offset[8];
+	unsigned char disk[4];
+	unsigned char _end[1];
+};
+
  struct zip64_dir_trailer {
  	unsigned char magic[4];
  	unsigned char record_size[8];
@@ -122,6 +133,9 @@ struct zip64_dir_trailer_locator {
  #define ZIP_EXTRA_MTIME_SIZE	offsetof(struct zip_extra_mtime, _end)
  #define ZIP_EXTRA_MTIME_PAYLOAD_SIZE \
  	(ZIP_EXTRA_MTIME_SIZE - offsetof(struct zip_extra_mtime, flags))
+#define ZIP_EXTRA_ZIP64_SIZE	offsetof(struct zip_extra_zip64, _end)
+#define ZIP_EXTRA_ZIP64_PAYLOAD_SIZE \
+	(ZIP_EXTRA_ZIP64_SIZE - offsetof(struct zip_extra_zip64, size))
  #define ZIP64_DIR_TRAILER_SIZE	offsetof(struct zip64_dir_trailer, _end)
  #define ZIP64_DIR_TRAILER_RECORD_SIZE \
  	(ZIP64_DIR_TRAILER_SIZE - \
@@ -219,19 +233,25 @@ static void set_zip_dir_data_desc(struct zip_dir_header *header,
  				  unsigned long compressed_size,
  				  unsigned long crc)
  {
+	int clamped = 0;
  	copy_le32(header->crc32, crc);
-	copy_le32(header->compressed_size, compressed_size);
-	copy_le32(header->size, size);
+	copy_le32(header->compressed_size, clamp_max(compressed_size, 0xFFFFFFFFU, &clamped));
+	copy_le32(header->size, clamp_max(size, 0xFFFFFFFFU, &clamped));
+	if (clamped)
+		zip_zip64 = 1;
  }

  static void set_zip_header_data_desc(struct zip_local_header *header,
  				     unsigned long size,
  				     unsigned long compressed_size,
-				     unsigned long crc)
+				     unsigned long crc,
+				     int *clamped)
  {
  	copy_le32(header->crc32, crc);
-	copy_le32(header->compressed_size, compressed_size);
-	copy_le32(header->size, size);
+	copy_le32(header->compressed_size, clamp_max(compressed_size, 0xFFFFFFFFU, clamped));
+	copy_le32(header->size, clamp_max(size, 0xFFFFFFFFU, clamped));
+	if (clamped)
+		zip_zip64 = 1;
  }

  static int has_only_ascii(const char *s)
@@ -279,6 +299,7 @@ static int write_zip_entry(struct archiver_args *args,
  	int is_binary = -1;
  	const char *path_without_prefix = path + args->baselen;
  	unsigned int creator_version = 0;
+	int clamped = 0;

  	crc = crc32(0, NULL, 0);

@@ -376,7 +397,7 @@ static int write_zip_entry(struct archiver_args *args,
  	copy_le16(dirent.comment_length, 0);
  	copy_le16(dirent.disk, 0);
  	copy_le32(dirent.attr2, attr2);
-	copy_le32(dirent.offset, zip_offset);
+	copy_le32(dirent.offset, clamp_max(zip_offset, 0xFFFFFFFFU, &clamped));

  	copy_le32(header.magic, 0x04034b50);
  	copy_le16(header.version, 10);
@@ -384,15 +405,26 @@ static int write_zip_entry(struct archiver_args *args,
  	copy_le16(header.compression_method, method);
  	copy_le16(header.mtime, zip_time);
  	copy_le16(header.mdate, zip_date);
-	set_zip_header_data_desc(&header, size, compressed_size, crc);
+	set_zip_header_data_desc(&header, size, compressed_size, crc, &clamped);
  	copy_le16(header.filename_length, pathlen);
-	copy_le16(header.extra_length, ZIP_EXTRA_MTIME_SIZE);
+	copy_le16(header.extra_length, ZIP_EXTRA_MTIME_SIZE + (clamped ? ZIP_EXTRA_ZIP64_SIZE : 0));
  	write_or_die(1, &header, ZIP_LOCAL_HEADER_SIZE);
  	zip_offset += ZIP_LOCAL_HEADER_SIZE;
  	write_or_die(1, path, pathlen);
  	zip_offset += pathlen;
  	write_or_die(1, &extra, ZIP_EXTRA_MTIME_SIZE);
  	zip_offset += ZIP_EXTRA_MTIME_SIZE;
+	if (clamped) {
+		struct zip_extra_zip64 extra_zip64;
+		copy_le16(extra_zip64.magic, 0x0001);
+		copy_le16(extra_zip64.extra_size, ZIP_EXTRA_ZIP64_PAYLOAD_SIZE);
+		copy_le64(extra_zip64.size, size);
+		copy_le64(extra_zip64.compressed_size, compressed_size);
+		copy_le64(extra_zip64.offset, zip_offset);
+		copy_le32(extra_zip64.disk, 0);
+		write_or_die(1, &extra_zip64, ZIP_EXTRA_ZIP64_SIZE);
+		zip_offset += ZIP_EXTRA_ZIP64_SIZE;
+	}
  	if (stream && method == 0) {
  		unsigned char buf[STREAM_BUFFER_SIZE];
  		ssize_t readlen;
@@ -538,7 +570,7 @@ static void write_zip_trailer(const unsigned char *sha1)
  	copy_le16(trailer.comment_length, sha1 ? GIT_SHA1_HEXSZ : 0);

  	write_or_die(1, zip_dir, zip_dir_offset);
-	if (clamped)
+	if (clamped || zip_zip64)
  		write_zip64_trailer();
  	write_or_die(1, &trailer, ZIP_DIR_TRAILER_SIZE);
  	if (sha1)
-- 
2.1.4

  reply	other threads:[~2017-04-22 19:32 UTC|newest]

Thread overview: 44+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-04-21 21:08 Git archive doesn't fully support zip64 Keith Goldfarb
2017-04-22 19:22 ` Peter Krefting [this message]
2017-04-22 21:52   ` [PATCH] archive-zip: Add zip64 headers when file size is too large for 32 bits Johannes Sixt
2017-04-22 22:41     ` [PATCH v2] " Peter Krefting
2017-04-23  7:50       ` Johannes Sixt
2017-04-23 14:51         ` Peter Krefting
2017-04-23 19:49           ` Johannes Sixt
2017-04-24  8:04             ` Peter Krefting
2017-04-24 12:04               ` René Scharfe
2017-04-24 17:22                 ` [PATCH v3 0/5] archive-zip: support files and archives bigger than 4GB René Scharfe
2017-04-24 17:29                   ` [PATCH v3 1/5] archive-zip: add tests for big ZIP archives René Scharfe
2017-04-24 17:30                   ` [PATCH v3 2/5] archive-zip: use strbuf for ZIP directory René Scharfe
2017-04-25  4:51                     ` Junio C Hamano
2017-04-25  5:28                       ` René Scharfe
2017-04-24 17:31                   ` [PATCH v3 3/5] archive-zip: write ZIP dir entry directly to strbuf René Scharfe
2017-04-24 17:32                   ` [PATCH v3 4/5] archive-zip: support archives bigger than 4GB René Scharfe
2017-04-24 18:24                     ` Peter Krefting
2017-04-24 20:06                       ` René Scharfe
2017-04-24 20:39                         ` René Scharfe
2017-04-24 21:02                         ` Johannes Sixt
2017-04-24 21:41                           ` René Scharfe
2017-04-25  7:55                         ` Peter Krefting
2017-04-25 16:24                           ` René Scharfe
2017-04-26 21:02                             ` Peter Krefting
2017-04-26 23:38                               ` René Scharfe
2017-04-27  4:57                                 ` Peter Krefting
2017-04-27 19:54                                   ` René Scharfe
2017-04-28  8:40                                     ` Peter Krefting
2017-04-24 17:33                   ` [PATCH v3 5/5] archive-zip: support files " René Scharfe
2017-04-24 21:11                     ` Keith Goldfarb
2017-04-25  4:46                     ` Junio C Hamano
2017-04-25  5:27                       ` René Scharfe
2017-04-29 21:00                   ` [PATCH v3 0/5] archive-zip: support files and archives " Torsten Bögershausen
2017-04-29 22:28                     ` René Scharfe
2017-04-30  5:31                       ` Torsten Bögershausen
2017-04-30  7:53                         ` René Scharfe
2017-04-30 13:06                           ` Torsten Bögershausen
2017-04-30 16:32                           ` Johannes Sixt
2017-04-30 16:40                             ` René Scharfe
2017-04-30 23:49                               ` Junio C Hamano
2017-05-01  8:30                                 ` René Scharfe
2017-04-23  0:16     ` [PATCH] archive-zip: Add zip64 headers when file size is too large for 32 bits René Scharfe
2017-04-23  6:42       ` Peter Krefting
2017-04-23  7:27         ` Johannes Sixt

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=alpine.DEB.2.11.1704222019420.12779@perkele.intern.softwolves.pp.se \
    --to=peter@softwolves.pp.se \
    --cc=git@vger.kernel.org \
    --cc=keith@blackthorn-media.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.