All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jonathan Nieder <jrnieder@gmail.com>
To: Ramkumar Ramachandra <artagnon@gmail.com>
Cc: Git Mailing List <git@vger.kernel.org>,
	David Michael Barr <david.barr@cordelta.com>,
	Sverre Rabbelier <srabbelier@gmail.com>,
	Junio C Hamano <gitster@pobox.com>
Subject: [PATCH 06/10] Add stream helper library
Date: Mon, 9 Aug 2010 17:39:43 -0500	[thread overview]
Message-ID: <20100809223943.GB4429@burratino> (raw)
In-Reply-To: <20100809215719.GA4203@burratino>

From: David Barr <david.barr@cordelta.com>

This library provides thread-unsafe fgets()- and fread()-like
functions where the caller does not have to supply a buffer.  It
maintains a couple of static buffers and provides an API to use
them.

[rr: allow input from files other than stdin]
[jn: with tests, documentation, and error handling improvements]

Signed-off-by: David Barr <david.barr@cordelta.com>
Signed-off-by: Ramkumar Ramachandra <artagnon@gmail.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
---
New tests and API docs.  The return value from buffer_deinit
can be used to check for errors now (I found this useful when
writing tests).

 .gitignore              |    1 +
 Makefile                |    8 +++-
 t/t0080-vcs-svn.sh      |   54 ++++++++++++++++++++++++++
 test-line-buffer.c      |   46 ++++++++++++++++++++++
 vcs-svn/line_buffer.c   |   97 +++++++++++++++++++++++++++++++++++++++++++++++
 vcs-svn/line_buffer.h   |   12 ++++++
 vcs-svn/line_buffer.txt |   58 ++++++++++++++++++++++++++++
 7 files changed, 274 insertions(+), 2 deletions(-)
 create mode 100644 test-line-buffer.c
 create mode 100644 vcs-svn/line_buffer.c
 create mode 100644 vcs-svn/line_buffer.h
 create mode 100644 vcs-svn/line_buffer.txt

diff --git a/.gitignore b/.gitignore
index 9f109db..8c0512e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -166,6 +166,7 @@
 /test-dump-cache-tree
 /test-genrandom
 /test-index-version
+/test-line-buffer
 /test-match-trees
 /test-obj-pool
 /test-parse-options
diff --git a/Makefile b/Makefile
index 24103c9..a76cce5 100644
--- a/Makefile
+++ b/Makefile
@@ -408,6 +408,7 @@ TEST_PROGRAMS_NEED_X += test-date
 TEST_PROGRAMS_NEED_X += test-delta
 TEST_PROGRAMS_NEED_X += test-dump-cache-tree
 TEST_PROGRAMS_NEED_X += test-genrandom
+TEST_PROGRAMS_NEED_X += test-line-buffer
 TEST_PROGRAMS_NEED_X += test-match-trees
 TEST_PROGRAMS_NEED_X += test-obj-pool
 TEST_PROGRAMS_NEED_X += test-parse-options
@@ -1743,7 +1744,7 @@ ifndef NO_CURL
 endif
 XDIFF_OBJS = xdiff/xdiffi.o xdiff/xprepare.o xdiff/xutils.o xdiff/xemit.o \
 	xdiff/xmerge.o xdiff/xpatience.o
-VCSSVN_OBJS = vcs-svn/string_pool.o
+VCSSVN_OBJS = vcs-svn/string_pool.o vcs-svn/line_buffer.o
 OBJECTS := $(GIT_OBJS) $(XDIFF_OBJS) $(VCSSVN_OBJS)
 
 dep_files := $(foreach f,$(OBJECTS),$(dir $f).depend/$(notdir $f).d)
@@ -1867,7 +1868,8 @@ xdiff-interface.o $(XDIFF_OBJS): \
 	xdiff/xutils.h xdiff/xprepare.h xdiff/xdiffi.h xdiff/xemit.h
 
 $(VCSSVN_OBJS): \
-	vcs-svn/obj_pool.h vcs-svn/trp.h vcs-svn/string_pool.h
+	vcs-svn/obj_pool.h vcs-svn/trp.h vcs-svn/string_pool.h \
+	vcs-svn/line_buffer.h
 endif
 
 exec_cmd.s exec_cmd.o: EXTRA_CPPFLAGS = \
@@ -2016,6 +2018,8 @@ test-date$X: date.o ctype.o
 
 test-delta$X: diff-delta.o patch-delta.o
 
+test-line-buffer$X: vcs-svn/lib.a
+
 test-parse-options$X: parse-options.o
 
 test-string-pool$X: vcs-svn/lib.a
diff --git a/t/t0080-vcs-svn.sh b/t/t0080-vcs-svn.sh
index 99a314b..d3225ad 100755
--- a/t/t0080-vcs-svn.sh
+++ b/t/t0080-vcs-svn.sh
@@ -76,6 +76,60 @@ test_expect_success 'obj pool: high-water mark' '
 	test_cmp expected actual
 '
 
+test_expect_success 'line buffer' '
+	echo HELLO >expected1 &&
+	printf "%s\n" "" HELLO >expected2 &&
+	echo >expected3 &&
+	printf "%s\n" "" Q | q_to_nul >expected4 &&
+	printf "%s\n" foo "" >expected5 &&
+	printf "%s\n" "" foo >expected6 &&
+
+	test-line-buffer <<-\EOF >actual1 &&
+	5
+	HELLO
+	EOF
+
+	test-line-buffer <<-\EOF >actual2 &&
+	0
+
+	5
+	HELLO
+	EOF
+
+	q_to_nul <<-\EOF |
+	1
+	Q
+	EOF
+	test-line-buffer >actual3 &&
+
+	q_to_nul <<-\EOF |
+	0
+
+	1
+	Q
+	EOF
+	test-line-buffer >actual4 &&
+
+	test-line-buffer <<-\EOF >actual5 &&
+	5
+	foo
+	EOF
+
+	test-line-buffer <<-\EOF >actual6 &&
+	0
+
+	5
+	foo
+	EOF
+
+	test_cmp expected1 actual1 &&
+	test_cmp expected2 actual2 &&
+	test_cmp expected3 actual3 &&
+	test_cmp expected4 actual4 &&
+	test_cmp expected5 actual5 &&
+	test_cmp expected6 actual6
+'
+
 test_expect_success 'string pool' '
 	echo a does not equal b >expected.differ &&
 	echo a equals a >expected.match &&
diff --git a/test-line-buffer.c b/test-line-buffer.c
new file mode 100644
index 0000000..c11bf7f
--- /dev/null
+++ b/test-line-buffer.c
@@ -0,0 +1,46 @@
+/*
+ * test-line-buffer.c: code to exercise the svn importer's input helper
+ *
+ * Input format:
+ *	number NL
+ *	(number bytes) NL
+ *	number NL
+ *	...
+ */
+
+#include "git-compat-util.h"
+#include "vcs-svn/line_buffer.h"
+
+static uint32_t strtouint32(const char *s)
+{
+	char *end;
+	uintmax_t n = strtoumax(s, &end, 10);
+	if (*s == '\0' || *end != '\0')
+		die("invalid count: %s", s);
+	return (uint32_t) n;
+}
+
+int main(int argc, char *argv[])
+{
+	char *s;
+
+	if (argc != 1)
+		usage("test-line-buffer < input.txt");
+	if (buffer_init(NULL))
+		die_errno("open error");
+	while ((s = buffer_read_line())) {
+		s = buffer_read_string(strtouint32(s));
+		fputs(s, stdout);
+		fputc('\n', stdout);
+		buffer_skip_bytes(1);
+		if (!(s = buffer_read_line()))
+			break;
+		buffer_copy_bytes(strtouint32(s) + 1);
+	}
+	if (buffer_deinit())
+		die("input error");
+	if (ferror(stdout))
+		die("output error");
+	buffer_reset();
+	return 0;
+}
diff --git a/vcs-svn/line_buffer.c b/vcs-svn/line_buffer.c
new file mode 100644
index 0000000..1543567
--- /dev/null
+++ b/vcs-svn/line_buffer.c
@@ -0,0 +1,97 @@
+/*
+ * Licensed under a two-clause BSD-style license.
+ * See LICENSE for details.
+ */
+
+#include "git-compat-util.h"
+#include "line_buffer.h"
+#include "obj_pool.h"
+
+#define LINE_BUFFER_LEN 10000
+#define COPY_BUFFER_LEN 4096
+
+/* Create memory pool for char sequence of known length */
+obj_pool_gen(blob, char, 4096)
+
+static char line_buffer[LINE_BUFFER_LEN];
+static char byte_buffer[COPY_BUFFER_LEN];
+static FILE *infile;
+
+int buffer_init(const char *filename)
+{
+	infile = filename ? fopen(filename, "r") : stdin;
+	if (!infile)
+		return -1;
+	return 0;
+}
+
+int buffer_deinit(void)
+{
+	int err;
+	if (infile == stdin)
+		return ferror(infile);
+	err = ferror(infile);
+	err |= fclose(infile);
+	return err;
+}
+
+/* Read a line without trailing newline. */
+char *buffer_read_line(void)
+{
+	char *end;
+	if (!fgets(line_buffer, sizeof(line_buffer), infile))
+		/* Error or data exhausted. */
+		return NULL;
+	end = line_buffer + strlen(line_buffer);
+	if (end[-1] == '\n')
+		end[-1] = '\0';
+	else if (feof(infile))
+		; /* No newline at end of file.  That's fine. */
+	else
+		/*
+		 * Line was too long.
+		 * There is probably a saner way to deal with this,
+		 * but for now let's return an error.
+		 */
+		return NULL;
+	return line_buffer;
+}
+
+char *buffer_read_string(uint32_t len)
+{
+	char *s;
+	blob_free(blob_pool.size);
+	s = blob_pointer(blob_alloc(len + 1));
+	s[fread(s, 1, len, infile)] = '\0';
+	return ferror(infile) ? NULL : s;
+}
+
+void buffer_copy_bytes(uint32_t len)
+{
+	uint32_t in;
+	while (len > 0 && !feof(infile) && !ferror(infile)) {
+		in = len < COPY_BUFFER_LEN ? len : COPY_BUFFER_LEN;
+		in = fread(byte_buffer, 1, in, infile);
+		len -= in;
+		fwrite(byte_buffer, 1, in, stdout);
+		if (ferror(stdout)) {
+			buffer_skip_bytes(len);
+			return;
+		}
+	}
+}
+
+void buffer_skip_bytes(uint32_t len)
+{
+	uint32_t in;
+	while (len > 0 && !feof(infile) && !ferror(infile)) {
+		in = len < COPY_BUFFER_LEN ? len : COPY_BUFFER_LEN;
+		in = fread(byte_buffer, 1, in, infile);
+		len -= in;
+	}
+}
+
+void buffer_reset(void)
+{
+	blob_reset();
+}
diff --git a/vcs-svn/line_buffer.h b/vcs-svn/line_buffer.h
new file mode 100644
index 0000000..9c78ae1
--- /dev/null
+++ b/vcs-svn/line_buffer.h
@@ -0,0 +1,12 @@
+#ifndef LINE_BUFFER_H_
+#define LINE_BUFFER_H_
+
+int buffer_init(const char *filename);
+int buffer_deinit(void);
+char *buffer_read_line(void);
+char *buffer_read_string(uint32_t len);
+void buffer_copy_bytes(uint32_t len);
+void buffer_skip_bytes(uint32_t len);
+void buffer_reset(void);
+
+#endif
diff --git a/vcs-svn/line_buffer.txt b/vcs-svn/line_buffer.txt
new file mode 100644
index 0000000..8906fb1
--- /dev/null
+++ b/vcs-svn/line_buffer.txt
@@ -0,0 +1,58 @@
+line_buffer API
+===============
+
+The line_buffer library provides a convenient interface for
+mostly-line-oriented input.
+
+Each line is not permitted to exceed 10000 bytes.  The provided
+functions are not thread-safe or async-signal-safe, and like
+`fgets()`, they generally do not function correctly if interrupted
+by a signal without SA_RESTART set.
+
+Calling sequence
+----------------
+
+The calling program:
+
+ - specifies a file to read with `buffer_init`
+ - processes input with `buffer_read_line`, `buffer_read_string`,
+   `buffer_skip_bytes`, and `buffer_copy_bytes`
+ - closes the file with `buffer_deinit`, perhaps to start over and
+   read another file.
+
+Before exiting, the caller can use `buffer_reset` to deallocate
+resources for the benefit of profiling tools.
+
+Functions
+---------
+
+`buffer_init`::
+	Open the named file for input.  If filename is NULL,
+	start reading from stdin.  On failure, returns -1 (with
+	errno indicating the nature of the failure).
+
+`buffer_deinit`::
+	Stop reading from the current file (closing it unless
+	it was stdin).  Returns nonzero if `fclose` fails or
+	the error indicator was set.
+
+`buffer_read_line`::
+	Read a line and strip off the trailing newline.
+	On failure or end of file, returns NULL.
+
+`buffer_read_string`::
+	Read `len` characters of input or up to the end of the
+	file, whichever comes first.  Returns NULL on error.
+	Returns whatever characters were read (possibly "")
+	for end of file.
+
+`buffer_copy_bytes`::
+	Read `len` bytes of input and dump them to the standard output
+	stream.  Returns early for error or end of file.
+
+`buffer_skip_bytes`::
+	Discards `len` bytes from the input stream (stopping early
+	if necessary because of an error or eof).
+
+`buffer_reset`::
+	Deallocates non-static buffers.
-- 
1.7.2.1.544.ga752d.dirty

  parent reply	other threads:[~2010-08-09 22:41 UTC|newest]

Thread overview: 79+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-07-15 16:22 [PATCH 0/8] Resurrect rr/svn-export Ramkumar Ramachandra
2010-07-15 16:22 ` [PATCH 1/8] Export parse_date_basic() to convert a date string to timestamp Ramkumar Ramachandra
2010-07-15 17:25   ` Jonathan Nieder
2010-07-15 22:54     ` Junio C Hamano
2010-07-15 16:22 ` [PATCH 2/8] Introduce vcs-svn lib Ramkumar Ramachandra
2010-07-15 17:46   ` Jonathan Nieder
2010-07-15 19:15     ` Ramkumar Ramachandra
2010-07-15 16:22 ` [PATCH 3/8] Add memory pool library Ramkumar Ramachandra
2010-07-15 18:57   ` Jonathan Nieder
2010-07-15 19:12     ` Ramkumar Ramachandra
2010-07-15 16:23 ` [PATCH 4/8] Add treap implementation Ramkumar Ramachandra
2010-07-15 19:09   ` Jonathan Nieder
2010-07-15 19:18     ` Ramkumar Ramachandra
2010-07-15 16:23 ` [PATCH 5/8] Add string-specific memory pool Ramkumar Ramachandra
2010-07-15 16:23 ` [PATCH 6/8] Add stream helper library Ramkumar Ramachandra
2010-07-15 19:19   ` Jonathan Nieder
2010-07-15 16:23 ` [PATCH 7/8] Add infrastructure to write revisions in fast-export format Ramkumar Ramachandra
2010-07-15 19:28   ` Jonathan Nieder
2010-07-15 16:23 ` [PATCH 8/8] Add SVN dump parser Ramkumar Ramachandra
2010-07-15 19:52   ` Jonathan Nieder
2010-07-15 20:04     ` Jonathan Nieder
2010-07-16 10:13 ` [PATCH 0/8] Resurrect rr/svn-export Jonathan Nieder
2010-07-16 10:16   ` [PATCH 3/9] Add memory pool library Jonathan Nieder
2010-07-16 10:23   ` [PATCH 4/9] Add treap implementation Jonathan Nieder
2010-07-16 18:26     ` Jonathan Nieder
2010-08-09 21:57   ` [PATCH 0/10] rr/svn-export reroll Jonathan Nieder
2010-08-09 22:01     ` [PATCH 01/10] Export parse_date_basic() to convert a date string to timestamp Jonathan Nieder
2010-08-09 22:04     ` [PATCH 02/10] Introduce vcs-svn lib Jonathan Nieder
2010-08-09 22:11     ` [PATCH 03/10] Add memory pool library Jonathan Nieder
2010-08-09 22:17     ` [PATCH 04/10] Add treap implementation Jonathan Nieder
2010-08-12 17:22       ` Junio C Hamano
2010-08-12 22:02         ` Jonathan Nieder
2010-08-12 22:11         ` Jonathan Nieder
2010-08-12 22:44           ` Junio C Hamano
2010-08-09 22:34     ` [PATCH 05/10] Add string-specific memory pool Jonathan Nieder
2010-08-12 17:22       ` Junio C Hamano
2010-08-12 21:30         ` Jonathan Nieder
2010-08-09 22:39     ` Jonathan Nieder [this message]
2010-08-09 22:48     ` [PATCH 07/10] Infrastructure to write revisions in fast-export format Jonathan Nieder
2010-08-09 22:55     ` [PATCH 08/10] SVN dump parser Jonathan Nieder
2010-08-12 17:22       ` Junio C Hamano
2010-08-09 22:55     ` PATCH 09/10] Update svn-fe manual Jonathan Nieder
2010-08-09 22:58     ` [PATCH 10/10] svn-fe manual: Clarify warning about deltas in dump files Jonathan Nieder
2010-08-10 12:53     ` [PATCH 0/10] rr/svn-export reroll Ramkumar Ramachandra
2010-08-11  1:53       ` Jonathan Nieder
2010-10-11  2:34       ` [PATCH/WIP 00/16] svn delta applier Jonathan Nieder
2010-10-11  2:37         ` [PATCH 01/16] vcs-svn: Eliminate global byte_buffer[] array Jonathan Nieder
2010-10-11  2:39         ` [PATCH 03/16] vcs-svn: Collect line_buffer data in a struct Jonathan Nieder
2010-10-11  2:41         ` [PATCH 04/16] vcs-svn: Teach line_buffer to handle multiple input files Jonathan Nieder
2010-10-11  2:44         ` [PATCH 05/16] vcs-svn: Make buffer_skip_bytes() report partial reads Jonathan Nieder
2010-10-11  2:46         ` [PATCH 06/16] vcs-svn: Improve support for reading large files Jonathan Nieder
2010-10-11  2:47         ` [PATCH 07/16] vcs-svn: Add binary-safe read() function Jonathan Nieder
2010-10-11  2:47         ` [PATCH 08/16] vcs-svn: Let callers peek ahead to find stream end Jonathan Nieder
2010-10-11  2:51         ` [PATCH 09/16] vcs-svn: Allow input errors to be detected early Jonathan Nieder
2010-10-11  2:52         ` [PATCH 10/16] vcs-svn: Allow character-oriented input Jonathan Nieder
2010-10-11  2:53         ` [PATCH 11/16] vcs-svn: Add code to maintain a sliding view of a file Jonathan Nieder
2010-10-11  2:55         ` [PATCH 12/16] vcs-svn: Learn to parse variable-length integers Jonathan Nieder
2010-10-11  2:58         ` [PATCH 13/16] vcs-svn: Learn to check for SVN\0 magic Jonathan Nieder
2010-10-11  2:59         ` [PATCH 14/16] compat: helper for detecting unsigned overflow Jonathan Nieder
2010-10-11  3:00         ` [PATCH 15/16] t9010 (svn-fe): Eliminate dependency on svn perl bindings Jonathan Nieder
2010-10-11  3:11         ` [PATCH 02/16] vcs-svn: Replace buffer_read_string() memory pool with a strbuf Jonathan Nieder
2010-10-11  4:01         ` [PATCH/RFC 16'/16] vcs-svn: Add svn delta parser Jonathan Nieder
2010-10-13  9:17           ` [PATCH/RFC 0/11] Building up the " Jonathan Nieder
2010-10-13  9:19             ` [PATCH 01/11] fixup! vcs-svn: Learn to parse variable-length integers Jonathan Nieder
2010-10-13  9:21             ` [PATCH 02/11] vcs-svn: Skeleton of an svn delta parser Jonathan Nieder
2010-10-13  9:30             ` [PATCH 03/11] vcs-svn: Read the preimage while applying deltas Jonathan Nieder
2010-10-14 21:45               ` Sam Vilain
2010-10-14 23:40                 ` Jonathan Nieder
2010-10-13  9:35             ` [PATCH 04/11] vcs-svn: Read inline data from deltas Jonathan Nieder
2010-10-13  9:38             ` [PATCH 05/11] vcs-svn: Read instructions " Jonathan Nieder
2010-10-13  9:39             ` [PATCH 06/11] vcs-svn: Implement copyfrom_data delta instruction Jonathan Nieder
2010-10-13  9:41             ` [PATCH 07/11] vcs-svn: Check declared number of output bytes Jonathan Nieder
2010-10-13  9:48             ` [PATCH 08/11] vcs-svn: Reject deltas that do not consume all inline data Jonathan Nieder
2010-10-13  9:50             ` [PATCH 09/11] vcs-svn: Let deltas use data from postimage Jonathan Nieder
2010-10-13  9:53             ` [PATCH 10/11] vcs-svn: Reject deltas that read past end of preimage Jonathan Nieder
2010-10-13  9:58             ` [PATCH 11/11] vcs-svn: Allow deltas to copy from preimage Jonathan Nieder
2010-10-13 10:00             ` Jonathan Nieder
2010-10-18 17:00             ` [PATCH/RFC 0/11] Building up the delta parser Ramkumar Ramachandra
2010-10-18 17:03               ` Jonathan Nieder

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20100809223943.GB4429@burratino \
    --to=jrnieder@gmail.com \
    --cc=artagnon@gmail.com \
    --cc=david.barr@cordelta.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=srabbelier@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.