git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "René Scharfe" <l.s.r@web.de>
To: Johannes Schindelin <Johannes.Schindelin@gmx.de>,
	Jeff King <peff@peff.net>
Cc: Rohit Ashiwal via GitGitGadget <gitgitgadget@gmail.com>,
	git@vger.kernel.org, Junio C Hamano <gitster@pobox.com>,
	Rohit Ashiwal <rohit.ashiwal265@gmail.com>
Subject: Re: [PATCH 2/2] archive: avoid spawning `gzip`
Date: Sat, 27 Apr 2019 19:39:27 +0200	[thread overview]
Message-ID: <45afd432-9e45-ea76-aa1b-e8cd1264e3a0@web.de> (raw)
In-Reply-To: <f6f32bc0-109c-e0eb-f7d2-9e46647f260c@web.de>

Am 27.04.19 um 11:59 schrieb René Scharfe:> Am 26.04.19 um 16:51 schrieb Johannes Schindelin:
>>
>> On Mon, 15 Apr 2019, Jeff King wrote:
>>
>>> On Sun, Apr 14, 2019 at 12:01:10AM +0200, René Scharfe wrote:
>>>
>>>> Doing compression in its own thread may be a good idea.
>>>
>>> Yeah. It might even make the patch simpler, since I'd expect it to be
>>> implemented with start_async() and a descriptor, making it look just
>>> like a gzip pipe to the caller. :)
>>
>> Sadly, it does not really look like it is simpler.
>
> I have to agree -- at least I was unable to pull off the stdout
> plumbing trick.

The simplest solution is of course to not touch the archive code.  The
patch below makes that possible:

Benchmark #1: ~/src/git/git -c tar.tgz.command=~/src/git/git-gzip archive --format=tgz HEAD >/dev/null
  Time (mean ± σ):     17.256 s ±  0.299 s    [User: 20.380 s, System: 0.294 s]
  Range (min … max):   16.940 s … 17.804 s    10 runs

Curious to see how it looks like on other systems and platforms.

And perhaps the buffer size needs to be tuned.

-- >8 --
Subject: [PATCH] add git gzip

Add a cheap gzip lookalike based on zlib for systems that don't have
(or want) the real thing.  It can be used e.g. to generate tgz files
using git archive and its configuration options tar.tgz.command and
tar.tar.gz.command, without any other external dependency.

Signed-off-by: Rene Scharfe <l.s.r@web.de>
---
 .gitignore       |  1 +
 Makefile         |  1 +
 builtin.h        |  1 +
 builtin/gzip.c   | 64 ++++++++++++++++++++++++++++++++++++++++++++++++
 command-list.txt |  1 +
 git.c            |  1 +
 6 files changed, 69 insertions(+)
 create mode 100644 builtin/gzip.c

diff --git a/.gitignore b/.gitignore
index 44c74402c8..e550868219 100644
--- a/.gitignore
+++ b/.gitignore
@@ -71,6 +71,7 @@
 /git-gc
 /git-get-tar-commit-id
 /git-grep
+/git-gzip
 /git-hash-object
 /git-help
 /git-http-backend
diff --git a/Makefile b/Makefile
index 9f1b6e8926..2b34f1a4aa 100644
--- a/Makefile
+++ b/Makefile
@@ -1075,6 +1075,7 @@ BUILTIN_OBJS += builtin/fsck.o
 BUILTIN_OBJS += builtin/gc.o
 BUILTIN_OBJS += builtin/get-tar-commit-id.o
 BUILTIN_OBJS += builtin/grep.o
+BUILTIN_OBJS += builtin/gzip.o
 BUILTIN_OBJS += builtin/hash-object.o
 BUILTIN_OBJS += builtin/help.o
 BUILTIN_OBJS += builtin/index-pack.o
diff --git a/builtin.h b/builtin.h
index b78ab6e30b..abc34cc9d0 100644
--- a/builtin.h
+++ b/builtin.h
@@ -170,6 +170,7 @@ extern int cmd_fsck(int argc, const char **argv, const char *prefix);
 extern int cmd_gc(int argc, const char **argv, const char *prefix);
 extern int cmd_get_tar_commit_id(int argc, const char **argv, const char *prefix);
 extern int cmd_grep(int argc, const char **argv, const char *prefix);
+extern int cmd_gzip(int argc, const char **argv, const char *prefix);
 extern int cmd_hash_object(int argc, const char **argv, const char *prefix);
 extern int cmd_help(int argc, const char **argv, const char *prefix);
 extern int cmd_index_pack(int argc, const char **argv, const char *prefix);
diff --git a/builtin/gzip.c b/builtin/gzip.c
new file mode 100644
index 0000000000..90a98c44ce
--- /dev/null
+++ b/builtin/gzip.c
@@ -0,0 +1,64 @@
+#include "cache.h"
+#include "builtin.h"
+#include "parse-options.h"
+
+static const char * const gzip_usage[] = {
+	N_("git gzip [-NUM]"),
+	NULL
+};
+
+static int level_callback(const struct option *opt, const char *arg, int unset)
+{
+	int *levelp = opt->value;
+	int value;
+	const char *endp;
+
+	if (unset)
+		BUG("switch -NUM cannot be negated");
+
+	value = strtol(arg, (char **)&endp, 10);
+	if (*endp)
+		BUG("switch -NUM cannot be non-numeric");
+
+	*levelp = value;
+	return 0;
+}
+
+#define BUFFERSIZE (64 * 1024)
+
+int cmd_gzip(int argc, const char **argv, const char *prefix)
+{
+	gzFile gz;
+	int level = Z_DEFAULT_COMPRESSION;
+	struct option options[] = {
+		OPT_NUMBER_CALLBACK(&level, N_("compression level"),
+				    level_callback),
+		OPT_END()
+	};
+
+	argc = parse_options(argc, argv, prefix, options, gzip_usage, 0);
+	if (argc > 0)
+		usage_with_options(gzip_usage, options);
+
+	gz = gzdopen(1, "wb");
+	if (!gz)
+		die(_("unable to gzdopen stdout"));
+
+	if (gzsetparams(gz, level, Z_DEFAULT_STRATEGY) != Z_OK)
+		die(_("unable to set compression level %d"), level);
+
+	for (;;) {
+		char buf[BUFFERSIZE];
+		ssize_t read_bytes = xread(0, buf, sizeof(buf));
+		if (read_bytes < 0)
+			die_errno(_("unable to read from stdin"));
+		if (read_bytes == 0)
+			break;
+		if (gzwrite(gz, buf, read_bytes) != read_bytes)
+			die(_("gzwrite failed"));
+	}
+
+	if (gzclose(gz) != Z_OK)
+		die(_("gzclose failed"));
+	return 0;
+}
diff --git a/command-list.txt b/command-list.txt
index 3a9af104b5..755848842c 100644
--- a/command-list.txt
+++ b/command-list.txt
@@ -99,6 +99,7 @@ git-gc                                  mainporcelain
 git-get-tar-commit-id                   plumbinginterrogators
 git-grep                                mainporcelain           info
 git-gui                                 mainporcelain
+git-gzip                                purehelpers
 git-hash-object                         plumbingmanipulators
 git-help                                ancillaryinterrogators          complete
 git-http-backend                        synchingrepositories
diff --git a/git.c b/git.c
index 50da125c60..48f7fc6c56 100644
--- a/git.c
+++ b/git.c
@@ -510,6 +510,7 @@ static struct cmd_struct commands[] = {
 	{ "gc", cmd_gc, RUN_SETUP },
 	{ "get-tar-commit-id", cmd_get_tar_commit_id, NO_PARSEOPT },
 	{ "grep", cmd_grep, RUN_SETUP_GENTLY },
+	{ "gzip", cmd_gzip },
 	{ "hash-object", cmd_hash_object },
 	{ "help", cmd_help },
 	{ "index-pack", cmd_index_pack, RUN_SETUP_GENTLY | NO_PARSEOPT },
--
2.21.0

  reply	other threads:[~2019-04-27 17:40 UTC|newest]

Thread overview: 74+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-04-12 23:04 [PATCH 0/2] Avoid spawning gzip in git archive Johannes Schindelin via GitGitGadget
2019-04-12 23:04 ` [PATCH 1/2] archive: replace write_or_die() calls with write_block_or_die() Rohit Ashiwal via GitGitGadget
2019-04-13  1:34   ` Jeff King
2019-04-13  5:51     ` Junio C Hamano
2019-04-14  4:36       ` Rohit Ashiwal
2019-04-26 14:29       ` Johannes Schindelin
2019-04-26 23:44         ` Junio C Hamano
2019-04-29 21:32           ` Johannes Schindelin
2019-05-01 18:09             ` Jeff King
2019-05-02 20:29               ` René Scharfe
2019-05-05  5:25               ` Junio C Hamano
2019-05-06  5:07                 ` Jeff King
2019-04-14  4:34     ` Rohit Ashiwal
2019-04-14 10:33       ` Junio C Hamano
2019-04-26 14:28     ` Johannes Schindelin
2019-05-01 18:07       ` Jeff King
2019-04-12 23:04 ` [PATCH 2/2] archive: avoid spawning `gzip` Rohit Ashiwal via GitGitGadget
2019-04-13  1:51   ` Jeff King
2019-04-13 22:01     ` René Scharfe
2019-04-15 21:35       ` Jeff King
2019-04-26 14:51         ` Johannes Schindelin
2019-04-27  9:59           ` René Scharfe
2019-04-27 17:39             ` René Scharfe [this message]
2019-04-29 21:25               ` Johannes Schindelin
2019-05-01 17:45                 ` René Scharfe
2019-05-01 18:18                   ` Jeff King
2019-06-10 10:44                     ` René Scharfe
2019-06-13 19:16                       ` Jeff King
2019-04-13 22:16     ` brian m. carlson
2019-04-15 21:36       ` Jeff King
2019-04-26 14:54       ` Johannes Schindelin
2019-05-02 20:20         ` Ævar Arnfjörð Bjarmason
2019-05-03 20:49           ` Johannes Schindelin
2019-05-03 20:52             ` Jeff King
2019-04-26 14:47     ` Johannes Schindelin
     [not found] ` <pull.145.v2.git.gitgitgadget@gmail.com>
     [not found]   ` <4ea94a8784876c3a19e387537edd81a957fc692c.1556321244.git.gitgitgadget@gmail.com>
2019-05-02 20:29     ` [PATCH v2 3/4] archive: optionally use zlib directly for gzip compression René Scharfe
     [not found]   ` <ac2b2488a1b42b3caf8a84594c48eca796748e59.1556321244.git.gitgitgadget@gmail.com>
2019-05-02 20:30     ` [PATCH v2 2/4] archive-tar: mark RECORDSIZE/BLOCKSIZE as unsigned René Scharfe
2019-05-08 11:45       ` Johannes Schindelin
2019-05-08 23:04         ` Jeff King
2019-05-09 14:06           ` Johannes Schindelin
2019-05-09 18:38             ` Jeff King
2019-05-10 17:18               ` René Scharfe
2019-05-10 21:20                 ` Jeff King
2022-06-12  6:00 ` [PATCH v3 0/5] Avoid spawning gzip in git archive René Scharfe
2022-06-12  6:03   ` [PATCH v3 1/5] archive: rename archiver data field to filter_command René Scharfe
2022-06-12  6:05   ` [PATCH v3 2/5] archive-tar: factor out write_block() René Scharfe
2022-06-12  6:08   ` [PATCH v3 3/5] archive-tar: add internal gzip implementation René Scharfe
2022-06-13 19:10     ` Junio C Hamano
2022-06-12  6:18   ` [PATCH v3 4/5] archive-tar: use OS_CODE 3 (Unix) for internal gzip René Scharfe
2022-06-12  6:19   ` [PATCH v3 5/5] archive-tar: use internal gzip by default René Scharfe
2022-06-13 21:55     ` Junio C Hamano
2022-06-14 11:27       ` Johannes Schindelin
2022-06-14 15:47         ` René Scharfe
2022-06-14 15:56           ` René Scharfe
2022-06-14 16:29           ` Johannes Schindelin
2022-06-14 20:04             ` René Scharfe
2022-06-15 16:41               ` Junio C Hamano
2022-06-14 11:28   ` [PATCH v3 0/5] Avoid spawning gzip in git archive Johannes Schindelin
2022-06-14 20:05     ` René Scharfe
2022-06-30 18:55       ` Johannes Schindelin
2022-07-01 16:05         ` Johannes Schindelin
2022-07-01 16:27           ` Jeff King
2022-07-01 17:47             ` Junio C Hamano
2022-06-15 16:53 ` [PATCH v4 0/6] " René Scharfe
2022-06-15 16:58   ` [PATCH v4 1/6] archive: update format documentation René Scharfe
2022-06-15 16:59   ` [PATCH v4 2/6] archive: rename archiver data field to filter_command René Scharfe
2022-06-15 17:01   ` [PATCH v4 3/6] archive-tar: factor out write_block() René Scharfe
2022-06-15 17:02   ` [PATCH v4 4/6] archive-tar: add internal gzip implementation René Scharfe
2022-06-15 20:32     ` Ævar Arnfjörð Bjarmason
2022-06-16 18:55       ` René Scharfe
2022-06-24 11:13         ` Ævar Arnfjörð Bjarmason
2022-06-24 20:24           ` René Scharfe
2022-06-15 17:04   ` [PATCH v4 5/6] archive-tar: use OS_CODE 3 (Unix) for internal gzip René Scharfe
2022-06-15 17:05   ` [PATCH v4 6/6] archive-tar: use internal gzip by default René Scharfe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=45afd432-9e45-ea76-aa1b-e8cd1264e3a0@web.de \
    --to=l.s.r@web.de \
    --cc=Johannes.Schindelin@gmx.de \
    --cc=git@vger.kernel.org \
    --cc=gitgitgadget@gmail.com \
    --cc=gitster@pobox.com \
    --cc=peff@peff.net \
    --cc=rohit.ashiwal265@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).