All of lore.kernel.org
 help / color / mirror / Atom feed
* vcs-svn: purge obsolete data structures and code
@ 2011-03-19  7:03 David Barr
  2011-03-19  7:03 ` [PATCH 1/9] vcs-svn: pass paths through to fast-import David Barr
                   ` (10 more replies)
  0 siblings, 11 replies; 72+ messages in thread
From: David Barr @ 2011-03-19  7:03 UTC (permalink / raw)
  To: Git Mailing List
  Cc: Jonathan Nieder, Ramkumar Ramachandra, Sverre Rabbelier,
	Sam Vilain, Stephen Bash, Tomas Carnecky

Patches 1-8:
Now that vcs-svn delegates tracking the repository structure to
fast-import, reorganise the code and drop the internal data
structures and supporting code.

Patch 9:
One final optimisation jumps out after the clean up, apply it.

 .gitignore              |    3 -
 Makefile                |   13 +--
 t/t0080-vcs-svn.sh      |  117 ---------------------
 test-obj-pool.c         |  116 ---------------------
 test-string-pool.c      |   31 ------
 test-treap.c            |   70 -------------
 vcs-svn/LICENSE         |    3 -
 vcs-svn/fast_export.c   |   64 ++++++------
 vcs-svn/fast_export.h   |   14 ++--
 vcs-svn/obj_pool.h      |   61 -----------
 vcs-svn/repo_tree.c     |   36 ++-----
 vcs-svn/repo_tree.h     |   12 +--
 vcs-svn/string_pool.c   |  113 --------------------
 vcs-svn/string_pool.h   |   12 --
 vcs-svn/string_pool.txt |   43 --------
 vcs-svn/svndump.c       |  260 ++++++++++++++++++++++++++---------------------
 vcs-svn/trp.h           |  237 ------------------------------------------
 vcs-svn/trp.txt         |  109 --------------------
 18 files changed, 197 insertions(+), 1117 deletions(-)

^ permalink raw reply	[flat|nested] 72+ messages in thread

* [PATCH 1/9] vcs-svn: pass paths through to fast-import
  2011-03-19  7:03 vcs-svn: purge obsolete data structures and code David Barr
@ 2011-03-19  7:03 ` David Barr
  2011-03-19  7:50   ` Jonathan Nieder
  2011-03-19  7:03 ` [PATCH 2/9] vcs-svn: avoid using ls command twice David Barr
                   ` (9 subsequent siblings)
  10 siblings, 1 reply; 72+ messages in thread
From: David Barr @ 2011-03-19  7:03 UTC (permalink / raw)
  To: Git Mailing List
  Cc: Jonathan Nieder, Ramkumar Ramachandra, Sverre Rabbelier,
	Sam Vilain, Stephen Bash, Tomas Carnecky, David Barr

Now that there is no internal representation of the repo,
it is not necessary to tokenise paths.

Use strbuf instead and bypass string_pool.

Signed-off-by: David Barr <david.barr@cordelta.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: David Barr <david.barr@cordelta.com>
---
 vcs-svn/fast_export.c |   47 ++++++++++++++++++------------------
 vcs-svn/fast_export.h |    9 +++----
 vcs-svn/repo_tree.c   |   20 +++++++-------
 vcs-svn/repo_tree.h   |   13 ++++------
 vcs-svn/svndump.c     |   63 +++++++++++++++++++++----------------------------
 5 files changed, 70 insertions(+), 82 deletions(-)

diff --git a/vcs-svn/fast_export.c b/vcs-svn/fast_export.c
index f19db9a..bb5e9aa 100644
--- a/vcs-svn/fast_export.c
+++ b/vcs-svn/fast_export.c
@@ -4,6 +4,8 @@
  */
 
 #include "git-compat-util.h"
+#include "strbuf.h"
+#include "quote.h"
 #include "fast_export.h"
 #include "line_buffer.h"
 #include "repo_tree.h"
@@ -32,30 +34,30 @@ void fast_export_reset(void)
 	buffer_reset(&report_buffer);
 }
 
-void fast_export_delete(uint32_t depth, const uint32_t *path)
+void fast_export_delete(const char *path)
 {
-	printf("D \"");
-	pool_print_seq_q(depth, path, '/', stdout);
-	printf("\"\n");
+	putchar('D');
+	putchar(' ');
+	quote_c_style(path, NULL, stdout, 0);
+	putchar('\n');
 }
 
-static void fast_export_truncate(uint32_t depth, const uint32_t *path, uint32_t mode)
+static void fast_export_truncate(const char *path, uint32_t mode)
 {
-	fast_export_modify(depth, path, mode, "inline");
+	fast_export_modify(path, mode, "inline");
 	printf("data 0\n\n");
 }
 
-void fast_export_modify(uint32_t depth, const uint32_t *path, uint32_t mode,
-			const char *dataref)
+void fast_export_modify(const char *path, uint32_t mode, const char *dataref)
 {
 	/* Mode must be 100644, 100755, 120000, or 160000. */
 	if (!dataref) {
-		fast_export_truncate(depth, path, mode);
+		fast_export_truncate(path, mode);
 		return;
 	}
-	printf("M %06"PRIo32" %s \"", mode, dataref);
-	pool_print_seq_q(depth, path, '/', stdout);
-	printf("\"\n");
+	printf("M %06"PRIo32" %s ", mode, dataref);
+	quote_c_style(path, NULL, stdout, 0);
+	putchar('\n');
 }
 
 static char gitsvnline[MAX_GITSVN_LINE_LEN];
@@ -93,20 +95,20 @@ void fast_export_end_commit(uint32_t revision)
 	printf("progress Imported commit %"PRIu32".\n\n", revision);
 }
 
-static void ls_from_rev(uint32_t rev, uint32_t depth, const uint32_t *path)
+static void ls_from_rev(uint32_t rev, const char *path)
 {
 	/* ls :5 path/to/old/file */
-	printf("ls :%"PRIu32" \"", rev);
-	pool_print_seq_q(depth, path, '/', stdout);
-	printf("\"\n");
+	printf("ls :%"PRIu32" ", rev);
+	quote_c_style(path, NULL, stdout, 0);
+	putchar('\n');
 	fflush(stdout);
 }
 
-static void ls_from_active_commit(uint32_t depth, const uint32_t *path)
+static void ls_from_active_commit(const char *path)
 {
 	/* ls "path/to/file" */
 	printf("ls \"");
-	pool_print_seq_q(depth, path, '/', stdout);
+	quote_c_style(path, NULL, stdout, 1);
 	printf("\"\n");
 	fflush(stdout);
 }
@@ -174,16 +176,15 @@ static int parse_ls_response(const char *response, uint32_t *mode,
 	return 0;
 }
 
-int fast_export_ls_rev(uint32_t rev, uint32_t depth, const uint32_t *path,
+int fast_export_ls_rev(uint32_t rev, const char *path,
 				uint32_t *mode, struct strbuf *dataref)
 {
-	ls_from_rev(rev, depth, path);
+	ls_from_rev(rev, path);
 	return parse_ls_response(get_response_line(), mode, dataref);
 }
 
-int fast_export_ls(uint32_t depth, const uint32_t *path,
-				uint32_t *mode, struct strbuf *dataref)
+int fast_export_ls(const char *path, uint32_t *mode, struct strbuf *dataref)
 {
-	ls_from_active_commit(depth, path);
+	ls_from_active_commit(path);
 	return parse_ls_response(get_response_line(), mode, dataref);
 }
diff --git a/vcs-svn/fast_export.h b/vcs-svn/fast_export.h
index 633d219..a47c609 100644
--- a/vcs-svn/fast_export.h
+++ b/vcs-svn/fast_export.h
@@ -8,18 +8,17 @@ void fast_export_init(int fd);
 void fast_export_deinit(void);
 void fast_export_reset(void);
 
-void fast_export_delete(uint32_t depth, const uint32_t *path);
-void fast_export_modify(uint32_t depth, const uint32_t *path,
-			uint32_t mode, const char *dataref);
+void fast_export_delete(const char *path);
+void fast_export_modify(const char *path, uint32_t mode, const char *dataref);
 void fast_export_begin_commit(uint32_t revision, uint32_t author, char *log,
 			uint32_t uuid, uint32_t url, unsigned long timestamp);
 void fast_export_end_commit(uint32_t revision);
 void fast_export_data(uint32_t mode, uint32_t len, struct line_buffer *input);
 
 /* If there is no such file at that rev, returns -1, errno == ENOENT. */
-int fast_export_ls_rev(uint32_t rev, uint32_t depth, const uint32_t *path,
+int fast_export_ls_rev(uint32_t rev, const char *path,
 			uint32_t *mode_out, struct strbuf *dataref_out);
-int fast_export_ls(uint32_t depth, const uint32_t *path,
+int fast_export_ls(const char *path,
 			uint32_t *mode_out, struct strbuf *dataref_out);
 
 #endif
diff --git a/vcs-svn/repo_tree.c b/vcs-svn/repo_tree.c
index e75f580..f2466bc 100644
--- a/vcs-svn/repo_tree.c
+++ b/vcs-svn/repo_tree.c
@@ -8,14 +8,14 @@
 #include "repo_tree.h"
 #include "fast_export.h"
 
-const char *repo_read_path(const uint32_t *path)
+const char *repo_read_path(const char *path)
 {
 	int err;
 	uint32_t dummy;
 	static struct strbuf buf = STRBUF_INIT;
 
 	strbuf_reset(&buf);
-	err = fast_export_ls(REPO_MAX_PATH_DEPTH, path, &dummy, &buf);
+	err = fast_export_ls(path, &dummy, &buf);
 	if (err) {
 		if (errno != ENOENT)
 			die_errno("BUG: unexpected fast_export_ls error");
@@ -24,14 +24,14 @@ const char *repo_read_path(const uint32_t *path)
 	return buf.buf;
 }
 
-uint32_t repo_read_mode(const uint32_t *path)
+uint32_t repo_read_mode(const char *path)
 {
 	int err;
 	uint32_t result;
 	static struct strbuf dummy = STRBUF_INIT;
 
 	strbuf_reset(&dummy);
-	err = fast_export_ls(REPO_MAX_PATH_DEPTH, path, &result, &dummy);
+	err = fast_export_ls(path, &result, &dummy);
 	if (err) {
 		if (errno != ENOENT)
 			die_errno("BUG: unexpected fast_export_ls error");
@@ -41,24 +41,24 @@ uint32_t repo_read_mode(const uint32_t *path)
 	return result;
 }
 
-void repo_copy(uint32_t revision, const uint32_t *src, const uint32_t *dst)
+void repo_copy(uint32_t revision, const char *src, const char *dst)
 {
 	int err;
 	uint32_t mode;
 	static struct strbuf data = STRBUF_INIT;
 
 	strbuf_reset(&data);
-	err = fast_export_ls_rev(revision, REPO_MAX_PATH_DEPTH, src, &mode, &data);
+	err = fast_export_ls_rev(revision, src, &mode, &data);
 	if (err) {
 		if (errno != ENOENT)
 			die_errno("BUG: unexpected fast_export_ls_rev error");
-		fast_export_delete(REPO_MAX_PATH_DEPTH, dst);
+		fast_export_delete(dst);
 		return;
 	}
-	fast_export_modify(REPO_MAX_PATH_DEPTH, dst, mode, data.buf);
+	fast_export_modify(dst, mode, data.buf);
 }
 
-void repo_delete(uint32_t *path)
+void repo_delete(const char *path)
 {
-	fast_export_delete(REPO_MAX_PATH_DEPTH, path);
+	fast_export_delete(path);
 }
diff --git a/vcs-svn/repo_tree.h b/vcs-svn/repo_tree.h
index d690784..af2415c 100644
--- a/vcs-svn/repo_tree.h
+++ b/vcs-svn/repo_tree.h
@@ -8,15 +8,12 @@
 #define REPO_MODE_EXE 0100755
 #define REPO_MODE_LNK 0120000
 
-#define REPO_MAX_PATH_LEN 4096
-#define REPO_MAX_PATH_DEPTH 1000
-
 uint32_t next_blob_mark(void);
-void repo_copy(uint32_t revision, const uint32_t *src, const uint32_t *dst);
-void repo_add(uint32_t *path, uint32_t mode, uint32_t blob_mark);
-const char *repo_read_path(const uint32_t *path);
-uint32_t repo_read_mode(const uint32_t *path);
-void repo_delete(uint32_t *path);
+void repo_copy(uint32_t revision, const char *src, const char *dst);
+void repo_add(const char *path, uint32_t mode, uint32_t blob_mark);
+const char *repo_read_path(const char *path);
+uint32_t repo_read_mode(const char *path);
+void repo_delete(const char *path);
 void repo_commit(uint32_t revision, uint32_t author, char *log, uint32_t uuid,
 		 uint32_t url, long unsigned timestamp);
 void repo_diff(uint32_t r1, uint32_t r2);
diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c
index 7ecb227..afdfc63 100644
--- a/vcs-svn/svndump.c
+++ b/vcs-svn/svndump.c
@@ -11,8 +11,8 @@
 #include "repo_tree.h"
 #include "fast_export.h"
 #include "line_buffer.h"
-#include "obj_pool.h"
 #include "string_pool.h"
+#include "strbuf.h"
 
 #define REPORT_FILENO 3
 
@@ -31,32 +31,20 @@
 #define LENGTH_UNKNOWN (~0)
 #define DATE_RFC2822_LEN 31
 
-/* Create memory pool for log messages */
-obj_pool_gen(log, char, 4096)
-
 static struct line_buffer input = LINE_BUFFER_INIT;
 
 #define REPORT_FILENO 3
 
-static char *log_copy(uint32_t length, const char *log)
-{
-	char *buffer;
-	log_free(log_pool.size);
-	buffer = log_pointer(log_alloc(length));
-	strncpy(buffer, log, length);
-	return buffer;
-}
-
 static struct {
 	uint32_t action, propLength, textLength, srcRev, type;
-	uint32_t src[REPO_MAX_PATH_DEPTH], dst[REPO_MAX_PATH_DEPTH];
+	struct strbuf src, dst;
 	uint32_t text_delta, prop_delta;
 } node_ctx;
 
 static struct {
 	uint32_t revision, author;
 	unsigned long timestamp;
-	char *log;
+	struct strbuf log;
 } rev_ctx;
 
 static struct {
@@ -78,9 +66,11 @@ static void reset_node_ctx(char *fname)
 	node_ctx.action = NODEACT_UNKNOWN;
 	node_ctx.propLength = LENGTH_UNKNOWN;
 	node_ctx.textLength = LENGTH_UNKNOWN;
-	node_ctx.src[0] = ~0;
+	strbuf_reset(&node_ctx.src);
 	node_ctx.srcRev = 0;
-	pool_tok_seq(REPO_MAX_PATH_DEPTH, node_ctx.dst, "/", fname);
+	strbuf_reset(&node_ctx.dst);
+	if (fname)
+		strbuf_addstr(&node_ctx.dst, fname);
 	node_ctx.text_delta = 0;
 	node_ctx.prop_delta = 0;
 }
@@ -89,7 +79,7 @@ static void reset_rev_ctx(uint32_t revision)
 {
 	rev_ctx.revision = revision;
 	rev_ctx.timestamp = 0;
-	rev_ctx.log = NULL;
+	strbuf_reset(&rev_ctx.log);
 	rev_ctx.author = ~0;
 }
 
@@ -130,7 +120,7 @@ static void handle_property(uint32_t key, const char *val, uint32_t len,
 		if (!val)
 			die("invalid dump: unsets svn:log");
 		/* Value length excludes terminating nul. */
-		rev_ctx.log = log_copy(len + 1, val);
+		strbuf_add(&rev_ctx.log, val, len + 1);
 	} else if (key == keys.svn_author) {
 		rev_ctx.author = pool_intern(val);
 	} else if (key == keys.svn_date) {
@@ -223,14 +213,14 @@ static void handle_node(void)
 		if (have_text || have_props || node_ctx.srcRev)
 			die("invalid dump: deletion node has "
 				"copyfrom info, text, or properties");
-		return repo_delete(node_ctx.dst);
+		return repo_delete(node_ctx.dst.buf);
 	}
 	if (node_ctx.action == NODEACT_REPLACE) {
-		repo_delete(node_ctx.dst);
+		repo_delete(node_ctx.dst.buf);
 		node_ctx.action = NODEACT_ADD;
 	}
 	if (node_ctx.srcRev) {
-		repo_copy(node_ctx.srcRev, node_ctx.src, node_ctx.dst);
+		repo_copy(node_ctx.srcRev, node_ctx.src.buf, node_ctx.dst.buf);
 		if (node_ctx.action == NODEACT_ADD)
 			node_ctx.action = NODEACT_CHANGE;
 	}
@@ -240,14 +230,14 @@ static void handle_node(void)
 	/*
 	 * Find old content (old_data) and decide on the new mode.
 	 */
-	if (node_ctx.action == NODEACT_CHANGE && !~*node_ctx.dst) {
+	if (node_ctx.action == NODEACT_CHANGE && !*node_ctx.dst.buf) {
 		if (type != REPO_MODE_DIR)
 			die("invalid dump: root of tree is not a regular file");
 		old_data = NULL;
 	} else if (node_ctx.action == NODEACT_CHANGE) {
 		uint32_t mode;
-		old_data = repo_read_path(node_ctx.dst);
-		mode = repo_read_mode(node_ctx.dst);
+		old_data = repo_read_path(node_ctx.dst.buf);
+		mode = repo_read_mode(node_ctx.dst.buf);
 		if (mode == REPO_MODE_DIR && type != REPO_MODE_DIR)
 			die("invalid dump: cannot modify a directory into a file");
 		if (mode != REPO_MODE_DIR && type == REPO_MODE_DIR)
@@ -284,12 +274,10 @@ static void handle_node(void)
 		/* For the fast_export_* functions, NULL means empty. */
 		old_data = NULL;
 	if (!have_text) {
-		fast_export_modify(REPO_MAX_PATH_DEPTH, node_ctx.dst,
-					node_ctx.type, old_data);
+		fast_export_modify(node_ctx.dst.buf, node_ctx.type, old_data);
 		return;
 	}
-	fast_export_modify(REPO_MAX_PATH_DEPTH, node_ctx.dst,
-				node_ctx.type, "inline");
+	fast_export_modify(node_ctx.dst.buf, node_ctx.type, "inline");
 	fast_export_data(node_ctx.type, node_ctx.textLength, &input);
 }
 
@@ -297,7 +285,7 @@ static void begin_revision(void)
 {
 	if (!rev_ctx.revision)	/* revision 0 gets no git commit. */
 		return;
-	fast_export_begin_commit(rev_ctx.revision, rev_ctx.author, rev_ctx.log,
+	fast_export_begin_commit(rev_ctx.revision, rev_ctx.author, rev_ctx.log.buf,
 		dump_ctx.uuid, dump_ctx.url, rev_ctx.timestamp);
 }
 
@@ -368,7 +356,8 @@ void svndump_read(const char *url)
 				node_ctx.action = NODEACT_UNKNOWN;
 			}
 		} else if (key == keys.node_copyfrom_path) {
-			pool_tok_seq(REPO_MAX_PATH_DEPTH, node_ctx.src, "/", val);
+			strbuf_reset(&node_ctx.src);
+			strbuf_addstr(&node_ctx.src, val);
 		} else if (key == keys.node_copyfrom_rev) {
 			node_ctx.srcRev = atoi(val);
 		} else if (key == keys.text_content_length) {
@@ -406,6 +395,9 @@ int svndump_init(const char *filename)
 	if (buffer_init(&input, filename))
 		return error("cannot open %s: %s", filename, strerror(errno));
 	fast_export_init(REPORT_FILENO);
+	strbuf_init(&rev_ctx.log, 4096);
+	strbuf_init(&node_ctx.src, 4096);
+	strbuf_init(&node_ctx.dst, 4096);
 	reset_dump_ctx(~0);
 	reset_rev_ctx(0);
 	reset_node_ctx(NULL);
@@ -415,11 +407,13 @@ int svndump_init(const char *filename)
 
 void svndump_deinit(void)
 {
-	log_reset();
 	fast_export_deinit();
 	reset_dump_ctx(~0);
 	reset_rev_ctx(0);
 	reset_node_ctx(NULL);
+	strbuf_release(&rev_ctx.log);
+	strbuf_release(&node_ctx.src);
+	strbuf_release(&node_ctx.dst);
 	if (buffer_deinit(&input))
 		fprintf(stderr, "Input error\n");
 	if (ferror(stdout))
@@ -428,10 +422,7 @@ void svndump_deinit(void)
 
 void svndump_reset(void)
 {
-	log_reset();
 	fast_export_reset();
 	buffer_reset(&input);
-	reset_dump_ctx(~0);
-	reset_rev_ctx(0);
-	reset_node_ctx(NULL);
+	pool_reset();
 }
-- 
1.7.3.2.846.gf4b062

^ permalink raw reply related	[flat|nested] 72+ messages in thread

* [PATCH 2/9] vcs-svn: avoid using ls command twice
  2011-03-19  7:03 vcs-svn: purge obsolete data structures and code David Barr
  2011-03-19  7:03 ` [PATCH 1/9] vcs-svn: pass paths through to fast-import David Barr
@ 2011-03-19  7:03 ` David Barr
  2011-03-19  8:01   ` Jonathan Nieder
  2011-03-19  7:03 ` [PATCH 3/9] vcs-svn: implement perfect hash for node-prop keys David Barr
                   ` (8 subsequent siblings)
  10 siblings, 1 reply; 72+ messages in thread
From: David Barr @ 2011-03-19  7:03 UTC (permalink / raw)
  To: Git Mailing List
  Cc: Jonathan Nieder, Ramkumar Ramachandra, Sverre Rabbelier,
	Sam Vilain, Stephen Bash, Tomas Carnecky, David Barr

Signed-off-by: David Barr <david.barr@cordelta.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: David Barr <david.barr@cordelta.com>
---
 vcs-svn/repo_tree.c |   24 ++++--------------------
 vcs-svn/repo_tree.h |    3 +--
 vcs-svn/svndump.c   |    3 +--
 3 files changed, 6 insertions(+), 24 deletions(-)

diff --git a/vcs-svn/repo_tree.c b/vcs-svn/repo_tree.c
index f2466bc..67d27f0 100644
--- a/vcs-svn/repo_tree.c
+++ b/vcs-svn/repo_tree.c
@@ -8,39 +8,23 @@
 #include "repo_tree.h"
 #include "fast_export.h"
 
-const char *repo_read_path(const char *path)
+const char *repo_read_path(const char *path, uint32_t *mode_out)
 {
 	int err;
-	uint32_t dummy;
 	static struct strbuf buf = STRBUF_INIT;
 
 	strbuf_reset(&buf);
-	err = fast_export_ls(path, &dummy, &buf);
+	err = fast_export_ls(path, mode_out, &buf);
 	if (err) {
 		if (errno != ENOENT)
 			die_errno("BUG: unexpected fast_export_ls error");
+		/* Treat missing paths as directories. */
+		*mode_out = REPO_MODE_DIR;
 		return NULL;
 	}
 	return buf.buf;
 }
 
-uint32_t repo_read_mode(const char *path)
-{
-	int err;
-	uint32_t result;
-	static struct strbuf dummy = STRBUF_INIT;
-
-	strbuf_reset(&dummy);
-	err = fast_export_ls(path, &result, &dummy);
-	if (err) {
-		if (errno != ENOENT)
-			die_errno("BUG: unexpected fast_export_ls error");
-		/* Treat missing paths as directories. */
-		return REPO_MODE_DIR;
-	}
-	return result;
-}
-
 void repo_copy(uint32_t revision, const char *src, const char *dst)
 {
 	int err;
diff --git a/vcs-svn/repo_tree.h b/vcs-svn/repo_tree.h
index af2415c..eb003e6 100644
--- a/vcs-svn/repo_tree.h
+++ b/vcs-svn/repo_tree.h
@@ -11,8 +11,7 @@
 uint32_t next_blob_mark(void);
 void repo_copy(uint32_t revision, const char *src, const char *dst);
 void repo_add(const char *path, uint32_t mode, uint32_t blob_mark);
-const char *repo_read_path(const char *path);
-uint32_t repo_read_mode(const char *path);
+const char *repo_read_path(const char *path, uint32_t *mode_out);
 void repo_delete(const char *path);
 void repo_commit(uint32_t revision, uint32_t author, char *log, uint32_t uuid,
 		 uint32_t url, long unsigned timestamp);
diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c
index afdfc63..15b173e 100644
--- a/vcs-svn/svndump.c
+++ b/vcs-svn/svndump.c
@@ -236,8 +236,7 @@ static void handle_node(void)
 		old_data = NULL;
 	} else if (node_ctx.action == NODEACT_CHANGE) {
 		uint32_t mode;
-		old_data = repo_read_path(node_ctx.dst.buf);
-		mode = repo_read_mode(node_ctx.dst.buf);
+		old_data = repo_read_path(node_ctx.dst.buf, &mode);
 		if (mode == REPO_MODE_DIR && type != REPO_MODE_DIR)
 			die("invalid dump: cannot modify a directory into a file");
 		if (mode != REPO_MODE_DIR && type == REPO_MODE_DIR)
-- 
1.7.3.2.846.gf4b062

^ permalink raw reply related	[flat|nested] 72+ messages in thread

* [PATCH 3/9] vcs-svn: implement perfect hash for node-prop keys
  2011-03-19  7:03 vcs-svn: purge obsolete data structures and code David Barr
  2011-03-19  7:03 ` [PATCH 1/9] vcs-svn: pass paths through to fast-import David Barr
  2011-03-19  7:03 ` [PATCH 2/9] vcs-svn: avoid using ls command twice David Barr
@ 2011-03-19  7:03 ` David Barr
  2011-03-19  8:51   ` Jonathan Nieder
  2011-03-19  7:03 ` [PATCH 4/9] vcs-svn: implement perfect hash for top-level keys David Barr
                   ` (7 subsequent siblings)
  10 siblings, 1 reply; 72+ messages in thread
From: David Barr @ 2011-03-19  7:03 UTC (permalink / raw)
  To: Git Mailing List
  Cc: Jonathan Nieder, Ramkumar Ramachandra, Sverre Rabbelier,
	Sam Vilain, Stephen Bash, Tomas Carnecky, David Barr

This eliminates one more dependency on string_pool.

Signed-off-by: David Barr <david.barr@cordelta.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: David Barr <david.barr@cordelta.com>
---
 vcs-svn/svndump.c |   50 ++++++++++++++++++++++++++++++++------------------
 1 files changed, 32 insertions(+), 18 deletions(-)

diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c
index 15b173e..49fb6db 100644
--- a/vcs-svn/svndump.c
+++ b/vcs-svn/svndump.c
@@ -52,8 +52,7 @@ static struct {
 } dump_ctx;
 
 static struct {
-	uint32_t svn_log, svn_author, svn_date, svn_executable, svn_special, uuid,
-		revision_number, node_path, node_kind, node_action,
+	uint32_t uuid, revision_number, node_path, node_kind, node_action,
 		node_copyfrom_path, node_copyfrom_rev, text_content_length,
 		prop_content_length, content_length, svn_fs_dump_format_version,
 		/* version 3 format */
@@ -92,11 +91,6 @@ static void reset_dump_ctx(uint32_t url)
 
 static void init_keys(void)
 {
-	keys.svn_log = pool_intern("svn:log");
-	keys.svn_author = pool_intern("svn:author");
-	keys.svn_date = pool_intern("svn:date");
-	keys.svn_executable = pool_intern("svn:executable");
-	keys.svn_special = pool_intern("svn:special");
 	keys.uuid = pool_intern("UUID");
 	keys.revision_number = pool_intern("Revision-number");
 	keys.node_path = pool_intern("Node-path");
@@ -113,22 +107,38 @@ static void init_keys(void)
 	keys.prop_delta = pool_intern("Prop-delta");
 }
 
-static void handle_property(uint32_t key, const char *val, uint32_t len,
+static void handle_property(const char *key, const char *val, uint32_t len,
 				uint32_t *type_set)
 {
-	if (key == keys.svn_log) {
+	const int key_len = strlen(key);
+	switch (key_len) {
+	case 7:
+		if (memcmp(key, "svn:log", 7))
+			break;
 		if (!val)
 			die("invalid dump: unsets svn:log");
 		/* Value length excludes terminating nul. */
 		strbuf_add(&rev_ctx.log, val, len + 1);
-	} else if (key == keys.svn_author) {
+		break;
+	case 10:
+		if (memcmp(key, "svn:author", 10))
+			break;
 		rev_ctx.author = pool_intern(val);
-	} else if (key == keys.svn_date) {
+		break;
+	case 8:
+		if (memcmp(key, "svn:date", 8))
+			break;
 		if (!val)
 			die("invalid dump: unsets svn:date");
 		if (parse_date_basic(val, &rev_ctx.timestamp, NULL))
 			warning("invalid timestamp: %s", val);
-	} else if (key == keys.svn_executable || key == keys.svn_special) {
+		break;
+	case 14:
+		if (memcmp(key, "svn:executable", 14))
+			break;
+	case 11:
+		if (key_len == 11 && memcmp(key, "svn:special", 11))
+			break;
 		if (*type_set) {
 			if (!val)
 				return;
@@ -139,7 +149,7 @@ static void handle_property(uint32_t key, const char *val, uint32_t len,
 			return;
 		}
 		*type_set = 1;
-		node_ctx.type = key == keys.svn_executable ?
+		node_ctx.type = key_len == strlen("svn:executable") ?
 				REPO_MODE_EXE :
 				REPO_MODE_LNK;
 	}
@@ -147,7 +157,7 @@ static void handle_property(uint32_t key, const char *val, uint32_t len,
 
 static void read_props(void)
 {
-	uint32_t key = ~0;
+	char key[16] = {0};
 	const char *t;
 	/*
 	 * NEEDSWORK: to support simple mode changes like
@@ -175,16 +185,20 @@ static void read_props(void)
 
 		switch (type) {
 		case 'K':
-			key = pool_intern(val);
-			continue;
 		case 'D':
-			key = pool_intern(val);
+			if (len < sizeof(key))
+				memcpy(key, val, len + 1);
+			else	/* nonstandard key. */
+				*key = '\0';
+			if (type == 'K')
+				continue;
+			assert(type == 'D');
 			val = NULL;
 			len = 0;
 			/* fall through */
 		case 'V':
 			handle_property(key, val, len, &type_set);
-			key = ~0;
+			*key = '\0';
 			continue;
 		default:
 			die("invalid property line: %s\n", t);
-- 
1.7.3.2.846.gf4b062

^ permalink raw reply related	[flat|nested] 72+ messages in thread

* [PATCH 4/9] vcs-svn: implement perfect hash for top-level keys
  2011-03-19  7:03 vcs-svn: purge obsolete data structures and code David Barr
                   ` (2 preceding siblings ...)
  2011-03-19  7:03 ` [PATCH 3/9] vcs-svn: implement perfect hash for node-prop keys David Barr
@ 2011-03-19  7:03 ` David Barr
  2011-03-19  8:57   ` Jonathan Nieder
  2011-03-19  7:03 ` [PATCH 5/9] vcs-svn: factor out usage of string_pool David Barr
                   ` (6 subsequent siblings)
  10 siblings, 1 reply; 72+ messages in thread
From: David Barr @ 2011-03-19  7:03 UTC (permalink / raw)
  To: Git Mailing List
  Cc: Jonathan Nieder, Ramkumar Ramachandra, Sverre Rabbelier,
	Sam Vilain, Stephen Bash, Tomas Carnecky, David Barr

This eliminates one more dependency on string_pool.

Signed-off-by: David Barr <david.barr@cordelta.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: David Barr <david.barr@cordelta.com>
---
 vcs-svn/svndump.c |  110 +++++++++++++++++++++++++++++------------------------
 1 files changed, 60 insertions(+), 50 deletions(-)

diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c
index 49fb6db..03f916d 100644
--- a/vcs-svn/svndump.c
+++ b/vcs-svn/svndump.c
@@ -51,14 +51,6 @@ static struct {
 	uint32_t version, uuid, url;
 } dump_ctx;
 
-static struct {
-	uint32_t uuid, revision_number, node_path, node_kind, node_action,
-		node_copyfrom_path, node_copyfrom_rev, text_content_length,
-		prop_content_length, content_length, svn_fs_dump_format_version,
-		/* version 3 format */
-		text_delta, prop_delta;
-} keys;
-
 static void reset_node_ctx(char *fname)
 {
 	node_ctx.type = 0;
@@ -89,24 +81,6 @@ static void reset_dump_ctx(uint32_t url)
 	dump_ctx.uuid = ~0;
 }
 
-static void init_keys(void)
-{
-	keys.uuid = pool_intern("UUID");
-	keys.revision_number = pool_intern("Revision-number");
-	keys.node_path = pool_intern("Node-path");
-	keys.node_kind = pool_intern("Node-kind");
-	keys.node_action = pool_intern("Node-action");
-	keys.node_copyfrom_path = pool_intern("Node-copyfrom-path");
-	keys.node_copyfrom_rev = pool_intern("Node-copyfrom-rev");
-	keys.text_content_length = pool_intern("Text-content-length");
-	keys.prop_content_length = pool_intern("Prop-content-length");
-	keys.content_length = pool_intern("Content-length");
-	keys.svn_fs_dump_format_version = pool_intern("SVN-fs-dump-format-version");
-	/* version 3 format (Subversion 1.1.0) */
-	keys.text_delta = pool_intern("Text-delta");
-	keys.prop_delta = pool_intern("Prop-delta");
-}
-
 static void handle_property(const char *key, const char *val, uint32_t len,
 				uint32_t *type_set)
 {
@@ -314,7 +288,6 @@ void svndump_read(const char *url)
 	char *t;
 	uint32_t active_ctx = DUMP_CTX;
 	uint32_t len;
-	uint32_t key;
 
 	reset_dump_ctx(pool_intern(url));
 	while ((t = buffer_read_line(&input))) {
@@ -323,16 +296,25 @@ void svndump_read(const char *url)
 			continue;
 		*val++ = '\0';
 		*val++ = '\0';
-		key = pool_intern(t);
 
-		if (key == keys.svn_fs_dump_format_version) {
+		/* strlen(key) */
+		switch (val - t - 2) { 
+		case 26:
+			if (memcmp(t, "SVN-fs-dump-format-version", 26))
+				continue;
 			dump_ctx.version = atoi(val);
 			if (dump_ctx.version > 3)
 				die("expected svn dump format version <= 3, found %"PRIu32,
 				    dump_ctx.version);
-		} else if (key == keys.uuid) {
+			break;
+		case 4:
+			if (memcmp(t, "UUID", 4))
+				continue;
 			dump_ctx.uuid = pool_intern(val);
-		} else if (key == keys.revision_number) {
+			break;
+		case 15:
+			if (memcmp(t, "Revision-number", 15))
+				continue;
 			if (active_ctx == NODE_CTX)
 				handle_node();
 			if (active_ctx == REV_CTX)
@@ -341,21 +323,31 @@ void svndump_read(const char *url)
 				end_revision();
 			active_ctx = REV_CTX;
 			reset_rev_ctx(atoi(val));
-		} else if (key == keys.node_path) {
-			if (active_ctx == NODE_CTX)
-				handle_node();
-			if (active_ctx == REV_CTX)
-				begin_revision();
-			active_ctx = NODE_CTX;
-			reset_node_ctx(val);
-		} else if (key == keys.node_kind) {
+			break;
+		case 9:
+			if (prefixcmp(t, "Node-"))
+				continue;
+			if (!memcmp(t + strlen("Node-"), "path", 4)) {
+				if (active_ctx == NODE_CTX)
+					handle_node();
+				if (active_ctx == REV_CTX)
+					begin_revision();
+				active_ctx = NODE_CTX;
+				reset_node_ctx(val);
+				break;
+			}
+			if (memcmp(t + strlen("Node-"), "kind", 4))
+				continue;
 			if (!strcmp(val, "dir"))
 				node_ctx.type = REPO_MODE_DIR;
 			else if (!strcmp(val, "file"))
 				node_ctx.type = REPO_MODE_BLB;
 			else
 				fprintf(stderr, "Unknown node-kind: %s\n", val);
-		} else if (key == keys.node_action) {
+			break;
+		case 11:
+			if (memcmp(t, "Node-action", 11))
+				continue;
 			if (!strcmp(val, "delete")) {
 				node_ctx.action = NODEACT_DELETE;
 			} else if (!strcmp(val, "add")) {
@@ -368,20 +360,39 @@ void svndump_read(const char *url)
 				fprintf(stderr, "Unknown node-action: %s\n", val);
 				node_ctx.action = NODEACT_UNKNOWN;
 			}
-		} else if (key == keys.node_copyfrom_path) {
+			break;
+		case 18:
+			if (memcmp(t, "Node-copyfrom-path", 18))
+				continue;
 			strbuf_reset(&node_ctx.src);
 			strbuf_addstr(&node_ctx.src, val);
-		} else if (key == keys.node_copyfrom_rev) {
+			break;
+		case 17:
+			if (memcmp(t, "Node-copyfrom-rev", 17))
+				continue;
 			node_ctx.srcRev = atoi(val);
-		} else if (key == keys.text_content_length) {
-			node_ctx.textLength = atoi(val);
-		} else if (key == keys.prop_content_length) {
+			break;
+		case 19:
+			if (!memcmp(t, "Text-content-length", 19)) {
+				node_ctx.textLength = atoi(val);
+				break;
+			}
+			if (memcmp(t, "Prop-content-length", 19))
+				continue;
 			node_ctx.propLength = atoi(val);
-		} else if (key == keys.text_delta) {
-			node_ctx.text_delta = !strcmp(val, "true");
-		} else if (key == keys.prop_delta) {
+			break;
+		case 10:
+			if (!memcmp(t, "Text-delta", 10)) {
+				node_ctx.text_delta = !strcmp(val, "true");
+				break;
+			}
+			if (memcmp(t, "Prop-delta", 10))
+				continue;
 			node_ctx.prop_delta = !strcmp(val, "true");
-		} else if (key == keys.content_length) {
+			break;
+		case 14:
+			if (memcmp(t, "Content-length", 14))
+				continue;
 			len = atoi(val);
 			buffer_read_line(&input);
 			if (active_ctx == REV_CTX) {
@@ -414,7 +425,6 @@ int svndump_init(const char *filename)
 	reset_dump_ctx(~0);
 	reset_rev_ctx(0);
 	reset_node_ctx(NULL);
-	init_keys();
 	return 0;
 }
 
-- 
1.7.3.2.846.gf4b062

^ permalink raw reply related	[flat|nested] 72+ messages in thread

* [PATCH 5/9] vcs-svn: factor out usage of string_pool
  2011-03-19  7:03 vcs-svn: purge obsolete data structures and code David Barr
                   ` (3 preceding siblings ...)
  2011-03-19  7:03 ` [PATCH 4/9] vcs-svn: implement perfect hash for top-level keys David Barr
@ 2011-03-19  7:03 ` David Barr
  2011-03-19  9:08   ` Jonathan Nieder
  2011-03-19  7:03 ` [PATCH 6/9] vcs-svn: drop string_pool David Barr
                   ` (5 subsequent siblings)
  10 siblings, 1 reply; 72+ messages in thread
From: David Barr @ 2011-03-19  7:03 UTC (permalink / raw)
  To: Git Mailing List
  Cc: Jonathan Nieder, Ramkumar Ramachandra, Sverre Rabbelier,
	Sam Vilain, Stephen Bash, Tomas Carnecky, David Barr

Signed-off-by: David Barr <david.barr@cordelta.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: David Barr <david.barr@cordelta.com>
---
 vcs-svn/fast_export.c |   17 +++++++----------
 vcs-svn/fast_export.h |    5 +++--
 vcs-svn/svndump.c     |   44 ++++++++++++++++++++++++++------------------
 3 files changed, 36 insertions(+), 30 deletions(-)

diff --git a/vcs-svn/fast_export.c b/vcs-svn/fast_export.c
index bb5e9aa..1d50512 100644
--- a/vcs-svn/fast_export.c
+++ b/vcs-svn/fast_export.c
@@ -9,7 +9,6 @@
 #include "fast_export.h"
 #include "line_buffer.h"
 #include "repo_tree.h"
-#include "string_pool.h"
 #include "strbuf.h"
 
 #define MAX_GITSVN_LINE_LEN 4096
@@ -61,25 +60,23 @@ void fast_export_modify(const char *path, uint32_t mode, const char *dataref)
 }
 
 static char gitsvnline[MAX_GITSVN_LINE_LEN];
-void fast_export_begin_commit(uint32_t revision, uint32_t author, char *log,
-			uint32_t uuid, uint32_t url,
+void fast_export_begin_commit(uint32_t revision, const char *author,
+			const char *log, const char *uuid, const char *url,
 			unsigned long timestamp)
 {
-	if (!log)
-		log = "";
-	if (~uuid && ~url) {
+	if (*uuid && *url) {
 		snprintf(gitsvnline, MAX_GITSVN_LINE_LEN,
 				"\n\ngit-svn-id: %s@%"PRIu32" %s\n",
-				 pool_fetch(url), revision, pool_fetch(uuid));
+				 url, revision, uuid);
 	} else {
 		*gitsvnline = '\0';
 	}
 	printf("commit refs/heads/master\n");
 	printf("mark :%"PRIu32"\n", revision);
 	printf("committer %s <%s@%s> %ld +0000\n",
-		   ~author ? pool_fetch(author) : "nobody",
-		   ~author ? pool_fetch(author) : "nobody",
-		   ~uuid ? pool_fetch(uuid) : "local", timestamp);
+		   *author ? author : "nobody",
+		   *author ? author : "nobody",
+		   *uuid ? uuid : "local", timestamp);
 	printf("data %"PRIu32"\n%s%s\n",
 		   (uint32_t) (strlen(log) + strlen(gitsvnline)),
 		   log, gitsvnline);
diff --git a/vcs-svn/fast_export.h b/vcs-svn/fast_export.h
index a47c609..bc5bddf 100644
--- a/vcs-svn/fast_export.h
+++ b/vcs-svn/fast_export.h
@@ -10,8 +10,9 @@ void fast_export_reset(void);
 
 void fast_export_delete(const char *path);
 void fast_export_modify(const char *path, uint32_t mode, const char *dataref);
-void fast_export_begin_commit(uint32_t revision, uint32_t author, char *log,
-			uint32_t uuid, uint32_t url, unsigned long timestamp);
+void fast_export_begin_commit(uint32_t revision, const char *author,
+			const char *log, const char *uuid, const char *url,
+			unsigned long timestamp);
 void fast_export_end_commit(uint32_t revision);
 void fast_export_data(uint32_t mode, uint32_t len, struct line_buffer *input);
 
diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c
index 03f916d..fd67db8 100644
--- a/vcs-svn/svndump.c
+++ b/vcs-svn/svndump.c
@@ -11,7 +11,6 @@
 #include "repo_tree.h"
 #include "fast_export.h"
 #include "line_buffer.h"
-#include "string_pool.h"
 #include "strbuf.h"
 
 #define REPORT_FILENO 3
@@ -42,13 +41,14 @@ static struct {
 } node_ctx;
 
 static struct {
-	uint32_t revision, author;
+	uint32_t revision;
 	unsigned long timestamp;
-	struct strbuf log;
+	struct strbuf log, author;
 } rev_ctx;
 
 static struct {
-	uint32_t version, uuid, url;
+	uint32_t version;
+	struct strbuf uuid, url;
 } dump_ctx;
 
 static void reset_node_ctx(char *fname)
@@ -71,14 +71,16 @@ static void reset_rev_ctx(uint32_t revision)
 	rev_ctx.revision = revision;
 	rev_ctx.timestamp = 0;
 	strbuf_reset(&rev_ctx.log);
-	rev_ctx.author = ~0;
+	strbuf_reset(&rev_ctx.author);
 }
 
-static void reset_dump_ctx(uint32_t url)
+static void reset_dump_ctx(const char *url)
 {
-	dump_ctx.url = url;
+	strbuf_reset(&dump_ctx.url);
+	if (url)
+		strbuf_addstr(&dump_ctx.url, url);
 	dump_ctx.version = 1;
-	dump_ctx.uuid = ~0;
+	strbuf_reset(&dump_ctx.uuid);
 }
 
 static void handle_property(const char *key, const char *val, uint32_t len,
@@ -91,13 +93,15 @@ static void handle_property(const char *key, const char *val, uint32_t len,
 			break;
 		if (!val)
 			die("invalid dump: unsets svn:log");
-		/* Value length excludes terminating nul. */
-		strbuf_add(&rev_ctx.log, val, len + 1);
+		strbuf_reset(&rev_ctx.log);
+		strbuf_add(&rev_ctx.log, val, len);
 		break;
 	case 10:
 		if (memcmp(key, "svn:author", 10))
 			break;
-		rev_ctx.author = pool_intern(val);
+		strbuf_reset(&rev_ctx.author);
+		if (val)
+			strbuf_add(&rev_ctx.author, val, len);
 		break;
 	case 8:
 		if (memcmp(key, "svn:date", 8))
@@ -272,8 +276,9 @@ static void begin_revision(void)
 {
 	if (!rev_ctx.revision)	/* revision 0 gets no git commit. */
 		return;
-	fast_export_begin_commit(rev_ctx.revision, rev_ctx.author, rev_ctx.log.buf,
-		dump_ctx.uuid, dump_ctx.url, rev_ctx.timestamp);
+	fast_export_begin_commit(rev_ctx.revision, rev_ctx.author.buf,
+		rev_ctx.log.buf, dump_ctx.uuid.buf, dump_ctx.url.buf,
+		rev_ctx.timestamp);
 }
 
 static void end_revision(void)
@@ -289,7 +294,7 @@ void svndump_read(const char *url)
 	uint32_t active_ctx = DUMP_CTX;
 	uint32_t len;
 
-	reset_dump_ctx(pool_intern(url));
+	reset_dump_ctx(url);
 	while ((t = buffer_read_line(&input))) {
 		val = strstr(t, ": ");
 		if (!val)
@@ -310,7 +315,8 @@ void svndump_read(const char *url)
 		case 4:
 			if (memcmp(t, "UUID", 4))
 				continue;
-			dump_ctx.uuid = pool_intern(val);
+			strbuf_reset(&dump_ctx.uuid);
+			strbuf_addstr(&dump_ctx.uuid, val);
 			break;
 		case 15:
 			if (memcmp(t, "Revision-number", 15))
@@ -419,10 +425,13 @@ int svndump_init(const char *filename)
 	if (buffer_init(&input, filename))
 		return error("cannot open %s: %s", filename, strerror(errno));
 	fast_export_init(REPORT_FILENO);
+	strbuf_init(&dump_ctx.uuid, 4096);
+	strbuf_init(&dump_ctx.url, 4096);
 	strbuf_init(&rev_ctx.log, 4096);
+	strbuf_init(&rev_ctx.author, 4096);
 	strbuf_init(&node_ctx.src, 4096);
 	strbuf_init(&node_ctx.dst, 4096);
-	reset_dump_ctx(~0);
+	reset_dump_ctx(NULL);
 	reset_rev_ctx(0);
 	reset_node_ctx(NULL);
 	return 0;
@@ -431,7 +440,7 @@ int svndump_init(const char *filename)
 void svndump_deinit(void)
 {
 	fast_export_deinit();
-	reset_dump_ctx(~0);
+	reset_dump_ctx(NULL);
 	reset_rev_ctx(0);
 	reset_node_ctx(NULL);
 	strbuf_release(&rev_ctx.log);
@@ -447,5 +456,4 @@ void svndump_reset(void)
 {
 	fast_export_reset();
 	buffer_reset(&input);
-	pool_reset();
 }
-- 
1.7.3.2.846.gf4b062

^ permalink raw reply related	[flat|nested] 72+ messages in thread

* [PATCH 6/9] vcs-svn: drop string_pool
  2011-03-19  7:03 vcs-svn: purge obsolete data structures and code David Barr
                   ` (4 preceding siblings ...)
  2011-03-19  7:03 ` [PATCH 5/9] vcs-svn: factor out usage of string_pool David Barr
@ 2011-03-19  7:03 ` David Barr
  2011-03-19  7:03 ` [PATCH 7/9] vcs-svn: drop trp.h David Barr
                   ` (4 subsequent siblings)
  10 siblings, 0 replies; 72+ messages in thread
From: David Barr @ 2011-03-19  7:03 UTC (permalink / raw)
  To: Git Mailing List
  Cc: Jonathan Nieder, Ramkumar Ramachandra, Sverre Rabbelier,
	Sam Vilain, Stephen Bash, Tomas Carnecky, David Barr

Signed-off-by: David Barr <david.barr@cordelta.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: David Barr <david.barr@cordelta.com>
---
 .gitignore              |    1 -
 Makefile                |   12 ++---
 t/t0080-vcs-svn.sh      |   16 -------
 test-string-pool.c      |   31 -------------
 vcs-svn/string_pool.c   |  113 -----------------------------------------------
 vcs-svn/string_pool.h   |   12 -----
 vcs-svn/string_pool.txt |   43 ------------------
 7 files changed, 4 insertions(+), 224 deletions(-)
 delete mode 100644 test-string-pool.c
 delete mode 100644 vcs-svn/string_pool.c
 delete mode 100644 vcs-svn/string_pool.h
 delete mode 100644 vcs-svn/string_pool.txt

diff --git a/.gitignore b/.gitignore
index c460c66..215e842 100644
--- a/.gitignore
+++ b/.gitignore
@@ -177,7 +177,6 @@
 /test-run-command
 /test-sha1
 /test-sigchain
-/test-string-pool
 /test-subprocess
 /test-svn-fe
 /test-treap
diff --git a/Makefile b/Makefile
index ade7923..f8182e5 100644
--- a/Makefile
+++ b/Makefile
@@ -430,7 +430,6 @@ TEST_PROGRAMS_NEED_X += test-path-utils
 TEST_PROGRAMS_NEED_X += test-run-command
 TEST_PROGRAMS_NEED_X += test-sha1
 TEST_PROGRAMS_NEED_X += test-sigchain
-TEST_PROGRAMS_NEED_X += test-string-pool
 TEST_PROGRAMS_NEED_X += test-subprocess
 TEST_PROGRAMS_NEED_X += test-svn-fe
 TEST_PROGRAMS_NEED_X += test-treap
@@ -1838,10 +1837,9 @@ ifndef NO_CURL
 endif
 XDIFF_OBJS = xdiff/xdiffi.o xdiff/xprepare.o xdiff/xutils.o xdiff/xemit.o \
 	xdiff/xmerge.o xdiff/xpatience.o
-VCSSVN_OBJS = vcs-svn/string_pool.o vcs-svn/line_buffer.o \
-	vcs-svn/repo_tree.o vcs-svn/fast_export.o vcs-svn/svndump.o
-VCSSVN_TEST_OBJS = test-obj-pool.o test-string-pool.o \
-	test-line-buffer.o test-treap.o
+VCSSVN_OBJS = vcs-svn/line_buffer.o vcs-svn/repo_tree.o \
+	vcs-svn/fast_export.o vcs-svn/svndump.o
+VCSSVN_TEST_OBJS = test-obj-pool.o test-line-buffer.o test-treap.o
 OBJECTS := $(GIT_OBJS) $(XDIFF_OBJS) $(VCSSVN_OBJS)
 
 dep_files := $(foreach f,$(OBJECTS),$(dir $f).depend/$(notdir $f).d)
@@ -1965,7 +1963,7 @@ xdiff-interface.o $(XDIFF_OBJS): \
 	xdiff/xutils.h xdiff/xprepare.h xdiff/xdiffi.h xdiff/xemit.h
 
 $(VCSSVN_OBJS) $(VCSSVN_TEST_OBJS): $(LIB_H) \
-	vcs-svn/obj_pool.h vcs-svn/trp.h vcs-svn/string_pool.h \
+	vcs-svn/obj_pool.h vcs-svn/trp.h \
 	vcs-svn/line_buffer.h vcs-svn/repo_tree.h vcs-svn/fast_export.h \
 	vcs-svn/svndump.h
 
@@ -2133,8 +2131,6 @@ test-line-buffer$X: vcs-svn/lib.a
 
 test-parse-options$X: parse-options.o
 
-test-string-pool$X: vcs-svn/lib.a
-
 test-svn-fe$X: vcs-svn/lib.a
 
 .PRECIOUS: $(TEST_OBJS)
diff --git a/t/t0080-vcs-svn.sh b/t/t0080-vcs-svn.sh
index 99a314b..ce02c58 100755
--- a/t/t0080-vcs-svn.sh
+++ b/t/t0080-vcs-svn.sh
@@ -76,22 +76,6 @@ test_expect_success 'obj pool: high-water mark' '
 	test_cmp expected actual
 '
 
-test_expect_success 'string pool' '
-	echo a does not equal b >expected.differ &&
-	echo a equals a >expected.match &&
-	echo equals equals equals >expected.matchmore &&
-
-	test-string-pool "a,--b" >actual.differ &&
-	test-string-pool "a,a" >actual.match &&
-	test-string-pool "equals-equals" >actual.matchmore &&
-	test_must_fail test-string-pool a,a,a &&
-	test_must_fail test-string-pool a &&
-
-	test_cmp expected.differ actual.differ &&
-	test_cmp expected.match actual.match &&
-	test_cmp expected.matchmore actual.matchmore
-'
-
 test_expect_success 'treap sort' '
 	cat <<-\EOF >unsorted &&
 	68
diff --git a/test-string-pool.c b/test-string-pool.c
deleted file mode 100644
index c5782e6..0000000
--- a/test-string-pool.c
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * test-string-pool.c: code to exercise the svn importer's string pool
- */
-
-#include "git-compat-util.h"
-#include "vcs-svn/string_pool.h"
-
-int main(int argc, char *argv[])
-{
-	const uint32_t unequal = pool_intern("does not equal");
-	const uint32_t equal = pool_intern("equals");
-	uint32_t buf[3];
-	uint32_t n;
-
-	if (argc != 2)
-		usage("test-string-pool <string>,<string>");
-
-	n = pool_tok_seq(3, buf, ",-", argv[1]);
-	if (n >= 3)
-		die("too many strings");
-	if (n <= 1)
-		die("too few strings");
-
-	buf[2] = buf[1];
-	buf[1] = (buf[0] == buf[2]) ? equal : unequal;
-	pool_print_seq(3, buf, ' ', stdout);
-	fputc('\n', stdout);
-
-	pool_reset();
-	return 0;
-}
diff --git a/vcs-svn/string_pool.c b/vcs-svn/string_pool.c
deleted file mode 100644
index be43598..0000000
--- a/vcs-svn/string_pool.c
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * Licensed under a two-clause BSD-style license.
- * See LICENSE for details.
- */
-
-#include "git-compat-util.h"
-#include "quote.h"
-#include "trp.h"
-#include "obj_pool.h"
-#include "string_pool.h"
-
-static struct trp_root tree = { ~0 };
-
-struct node {
-	uint32_t offset;
-	struct trp_node children;
-};
-
-/* Two memory pools: one for struct node, and another for strings */
-obj_pool_gen(node, struct node, 4096)
-obj_pool_gen(string, char, 4096)
-
-static char *node_value(struct node *node)
-{
-	return node ? string_pointer(node->offset) : NULL;
-}
-
-static int node_cmp(struct node *a, struct node *b)
-{
-	return strcmp(node_value(a), node_value(b));
-}
-
-/* Build a Treap from the node structure (a trp_node w/ offset) */
-trp_gen(static, tree_, struct node, children, node, node_cmp);
-
-const char *pool_fetch(uint32_t entry)
-{
-	return node_value(node_pointer(entry));
-}
-
-uint32_t pool_intern(const char *key)
-{
-	/* Canonicalize key */
-	struct node *match = NULL, *node;
-	uint32_t key_len;
-	if (key == NULL)
-		return ~0;
-	key_len = strlen(key) + 1;
-	node = node_pointer(node_alloc(1));
-	node->offset = string_alloc(key_len);
-	strcpy(node_value(node), key);
-	match = tree_search(&tree, node);
-	if (!match) {
-		tree_insert(&tree, node);
-	} else {
-		node_free(1);
-		string_free(key_len);
-		node = match;
-	}
-	return node_offset(node);
-}
-
-uint32_t pool_tok_r(char *str, const char *delim, char **saveptr)
-{
-	char *token = strtok_r(str, delim, saveptr);
-	return token ? pool_intern(token) : ~0;
-}
-
-void pool_print_seq(uint32_t len, const uint32_t *seq, char delim, FILE *stream)
-{
-	uint32_t i;
-	for (i = 0; i < len && ~seq[i]; i++) {
-		fputs(pool_fetch(seq[i]), stream);
-		if (i < len - 1 && ~seq[i + 1])
-			fputc(delim, stream);
-	}
-}
-
-void pool_print_seq_q(uint32_t len, const uint32_t *seq, char delim, FILE *stream)
-{
-	uint32_t i;
-	for (i = 0; i < len && ~seq[i]; i++) {
-		quote_c_style(pool_fetch(seq[i]), NULL, stream, 1);
-		if (i < len - 1 && ~seq[i + 1])
-			fputc(delim, stream);
-	}
-}
-
-uint32_t pool_tok_seq(uint32_t sz, uint32_t *seq, const char *delim, char *str)
-{
-	char *context = NULL;
-	uint32_t token = ~0;
-	uint32_t length;
-
-	if (sz == 0)
-		return ~0;
-	if (str)
-		token = pool_tok_r(str, delim, &context);
-	for (length = 0; length < sz; length++) {
-		seq[length] = token;
-		if (token == ~0)
-			return length;
-		token = pool_tok_r(NULL, delim, &context);
-	}
-	seq[sz - 1] = ~0;
-	return sz;
-}
-
-void pool_reset(void)
-{
-	node_reset();
-	string_reset();
-}
diff --git a/vcs-svn/string_pool.h b/vcs-svn/string_pool.h
deleted file mode 100644
index 96e501d..0000000
--- a/vcs-svn/string_pool.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef STRING_POOL_H_
-#define STRING_POOL_H_
-
-uint32_t pool_intern(const char *key);
-const char *pool_fetch(uint32_t entry);
-uint32_t pool_tok_r(char *str, const char *delim, char **saveptr);
-void pool_print_seq(uint32_t len, const uint32_t *seq, char delim, FILE *stream);
-void pool_print_seq_q(uint32_t len, const uint32_t *seq, char delim, FILE *stream);
-uint32_t pool_tok_seq(uint32_t sz, uint32_t *seq, const char *delim, char *str);
-void pool_reset(void);
-
-#endif
diff --git a/vcs-svn/string_pool.txt b/vcs-svn/string_pool.txt
deleted file mode 100644
index 1b41f15..0000000
--- a/vcs-svn/string_pool.txt
+++ /dev/null
@@ -1,43 +0,0 @@
-string_pool API
-===============
-
-The string_pool API provides facilities for replacing strings
-with integer keys that can be more easily compared and stored.
-The facilities are designed so that one could teach Git without
-too much trouble to store the information needed for these keys to
-remain valid over multiple executions.
-
-Functions
----------
-
-pool_intern::
-	Include a string in the string pool and get its key.
-	If that string is already in the pool, retrieves its
-	existing key.
-
-pool_fetch::
-	Retrieve the string associated to a given key.
-
-pool_tok_r::
-	Extract the key of the next token from a string.
-	Interface mimics strtok_r.
-
-pool_print_seq::
-	Print a sequence of strings named by key to a file, using the
-	specified delimiter to separate them.
-
-	If NULL (key ~0) appears in the sequence, the sequence ends
-	early.
-
-pool_tok_seq::
-	Split a string into tokens, storing the keys of segments
-	into a caller-provided array.
-
-	Unless sz is 0, the array will always be ~0-terminated.
-	If there is not enough room for all the tokens, the
-	array holds as many tokens as fit in the entries before
-	the terminating ~0.  Return value is the index after the
-	last token, or sz if the tokens did not fit.
-
-pool_reset::
-	Deallocate storage for the string pool.
-- 
1.7.3.2.846.gf4b062

^ permalink raw reply related	[flat|nested] 72+ messages in thread

* [PATCH 7/9] vcs-svn: drop trp.h
  2011-03-19  7:03 vcs-svn: purge obsolete data structures and code David Barr
                   ` (5 preceding siblings ...)
  2011-03-19  7:03 ` [PATCH 6/9] vcs-svn: drop string_pool David Barr
@ 2011-03-19  7:03 ` David Barr
  2011-03-19  7:03 ` [PATCH 8/9] vcs-svn: drop obj_pool.h David Barr
                   ` (3 subsequent siblings)
  10 siblings, 0 replies; 72+ messages in thread
From: David Barr @ 2011-03-19  7:03 UTC (permalink / raw)
  To: Git Mailing List
  Cc: Jonathan Nieder, Ramkumar Ramachandra, Sverre Rabbelier,
	Sam Vilain, Stephen Bash, Tomas Carnecky, David Barr

Signed-off-by: David Barr <david.barr@cordelta.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: David Barr <david.barr@cordelta.com>
---
 .gitignore         |    1 -
 Makefile           |    3 +-
 t/t0080-vcs-svn.sh |   22 -----
 test-treap.c       |   70 ---------------
 vcs-svn/LICENSE    |    3 -
 vcs-svn/trp.h      |  237 ----------------------------------------------------
 vcs-svn/trp.txt    |  109 ------------------------
 7 files changed, 1 insertions(+), 444 deletions(-)
 delete mode 100644 test-treap.c
 delete mode 100644 vcs-svn/trp.h
 delete mode 100644 vcs-svn/trp.txt

diff --git a/.gitignore b/.gitignore
index 215e842..aa94ff1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -179,7 +179,6 @@
 /test-sigchain
 /test-subprocess
 /test-svn-fe
-/test-treap
 /common-cmds.h
 *.tar.gz
 *.dsc
diff --git a/Makefile b/Makefile
index f8182e5..2d56ab9 100644
--- a/Makefile
+++ b/Makefile
@@ -432,7 +432,6 @@ TEST_PROGRAMS_NEED_X += test-sha1
 TEST_PROGRAMS_NEED_X += test-sigchain
 TEST_PROGRAMS_NEED_X += test-subprocess
 TEST_PROGRAMS_NEED_X += test-svn-fe
-TEST_PROGRAMS_NEED_X += test-treap
 TEST_PROGRAMS_NEED_X += test-index-version
 TEST_PROGRAMS_NEED_X += test-mktemp
 
@@ -1963,7 +1962,7 @@ xdiff-interface.o $(XDIFF_OBJS): \
 	xdiff/xutils.h xdiff/xprepare.h xdiff/xdiffi.h xdiff/xemit.h
 
 $(VCSSVN_OBJS) $(VCSSVN_TEST_OBJS): $(LIB_H) \
-	vcs-svn/obj_pool.h vcs-svn/trp.h \
+	vcs-svn/obj_pool.h \
 	vcs-svn/line_buffer.h vcs-svn/repo_tree.h vcs-svn/fast_export.h \
 	vcs-svn/svndump.h
 
diff --git a/t/t0080-vcs-svn.sh b/t/t0080-vcs-svn.sh
index ce02c58..3f29496 100755
--- a/t/t0080-vcs-svn.sh
+++ b/t/t0080-vcs-svn.sh
@@ -76,26 +76,4 @@ test_expect_success 'obj pool: high-water mark' '
 	test_cmp expected actual
 '
 
-test_expect_success 'treap sort' '
-	cat <<-\EOF >unsorted &&
-	68
-	12
-	13
-	13
-	68
-	13
-	13
-	21
-	10
-	11
-	12
-	13
-	13
-	EOF
-	sort unsorted >expected &&
-
-	test-treap <unsorted >actual &&
-	test_cmp expected actual
-'
-
 test_done
diff --git a/test-treap.c b/test-treap.c
deleted file mode 100644
index ab8c951..0000000
--- a/test-treap.c
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * test-treap.c: code to exercise the svn importer's treap structure
- */
-
-#include "cache.h"
-#include "vcs-svn/obj_pool.h"
-#include "vcs-svn/trp.h"
-
-struct int_node {
-	uintmax_t n;
-	struct trp_node children;
-};
-
-obj_pool_gen(node, struct int_node, 3)
-
-static int node_cmp(struct int_node *a, struct int_node *b)
-{
-	return (a->n > b->n) - (a->n < b->n);
-}
-
-trp_gen(static, treap_, struct int_node, children, node, node_cmp)
-
-static void strtonode(struct int_node *item, const char *s)
-{
-	char *end;
-	item->n = strtoumax(s, &end, 10);
-	if (*s == '\0' || (*end != '\n' && *end != '\0'))
-		die("invalid integer: %s", s);
-}
-
-int main(int argc, char *argv[])
-{
-	struct strbuf sb = STRBUF_INIT;
-	struct trp_root root = { ~0 };
-	uint32_t item;
-
-	if (argc != 1)
-		usage("test-treap < ints");
-
-	while (strbuf_getline(&sb, stdin, '\n') != EOF) {
-		struct int_node *node = node_pointer(node_alloc(1));
-
-		item = node_offset(node);
-		strtonode(node, sb.buf);
-		node = treap_insert(&root, node_pointer(item));
-		if (node_offset(node) != item)
-			die("inserted %"PRIu32" in place of %"PRIu32"",
-				node_offset(node), item);
-	}
-
-	item = node_offset(treap_first(&root));
-	while (~item) {
-		uint32_t next;
-		struct int_node *tmp = node_pointer(node_alloc(1));
-
-		tmp->n = node_pointer(item)->n;
-		next = node_offset(treap_next(&root, node_pointer(item)));
-
-		treap_remove(&root, node_pointer(item));
-		item = node_offset(treap_nsearch(&root, tmp));
-
-		if (item != next && (!~item || node_pointer(item)->n != tmp->n))
-			die("found %"PRIuMAX" in place of %"PRIuMAX"",
-				~item ? node_pointer(item)->n : ~(uintmax_t) 0,
-				~next ? node_pointer(next)->n : ~(uintmax_t) 0);
-		printf("%"PRIuMAX"\n", tmp->n);
-	}
-	node_reset();
-	return 0;
-}
diff --git a/vcs-svn/LICENSE b/vcs-svn/LICENSE
index 0a5e3c4..533f585 100644
--- a/vcs-svn/LICENSE
+++ b/vcs-svn/LICENSE
@@ -1,9 +1,6 @@
 Copyright (C) 2010 David Barr <david.barr@cordelta.com>.
 All rights reserved.
 
-Copyright (C) 2008 Jason Evans <jasone@canonware.com>.
-All rights reserved.
-
 Copyright (C) 2005 Stefan Hegny, hydrografix Consulting GmbH,
 Frankfurt/Main, Germany
 and others, see http://svn2cc.sarovar.org
diff --git a/vcs-svn/trp.h b/vcs-svn/trp.h
deleted file mode 100644
index c32b918..0000000
--- a/vcs-svn/trp.h
+++ /dev/null
@@ -1,237 +0,0 @@
-/*
- * C macro implementation of treaps.
- *
- * Usage:
- *   #include <stdint.h>
- *   #include "trp.h"
- *   trp_gen(...)
- *
- * Licensed under a two-clause BSD-style license.
- * See LICENSE for details.
- */
-
-#ifndef TRP_H_
-#define TRP_H_
-
-#define MAYBE_UNUSED __attribute__((__unused__))
-
-/* Node structure. */
-struct trp_node {
-	uint32_t trpn_left;
-	uint32_t trpn_right;
-};
-
-/* Root structure. */
-struct trp_root {
-	uint32_t trp_root;
-};
-
-/* Pointer/Offset conversion. */
-#define trpn_pointer(a_base, a_offset) (a_base##_pointer(a_offset))
-#define trpn_offset(a_base, a_pointer) (a_base##_offset(a_pointer))
-#define trpn_modify(a_base, a_offset) \
-	do { \
-		if ((a_offset) < a_base##_pool.committed) { \
-			uint32_t old_offset = (a_offset);\
-			(a_offset) = a_base##_alloc(1); \
-			*trpn_pointer(a_base, a_offset) = \
-				*trpn_pointer(a_base, old_offset); \
-		} \
-	} while (0)
-
-/* Left accessors. */
-#define trp_left_get(a_base, a_field, a_node) \
-	(trpn_pointer(a_base, a_node)->a_field.trpn_left)
-#define trp_left_set(a_base, a_field, a_node, a_left) \
-	do { \
-		trpn_modify(a_base, a_node); \
-		trp_left_get(a_base, a_field, a_node) = (a_left); \
-	} while (0)
-
-/* Right accessors. */
-#define trp_right_get(a_base, a_field, a_node) \
-	(trpn_pointer(a_base, a_node)->a_field.trpn_right)
-#define trp_right_set(a_base, a_field, a_node, a_right) \
-	do { \
-		trpn_modify(a_base, a_node); \
-		trp_right_get(a_base, a_field, a_node) = (a_right); \
-	} while (0)
-
-/*
- * Fibonacci hash function.
- * The multiplier is the nearest prime to (2^32 times (√5 - 1)/2).
- * See Knuth §6.4: volume 3, 3rd ed, p518.
- */
-#define trpn_hash(a_node) (uint32_t) (2654435761u * (a_node))
-
-/* Priority accessors. */
-#define trp_prio_get(a_node) trpn_hash(a_node)
-
-/* Node initializer. */
-#define trp_node_new(a_base, a_field, a_node) \
-	do { \
-		trp_left_set(a_base, a_field, (a_node), ~0); \
-		trp_right_set(a_base, a_field, (a_node), ~0); \
-	} while (0)
-
-/* Internal utility macros. */
-#define trpn_first(a_base, a_field, a_root, r_node) \
-	do { \
-		(r_node) = (a_root); \
-		if ((r_node) == ~0) \
-			return NULL; \
-		while (~trp_left_get(a_base, a_field, (r_node))) \
-			(r_node) = trp_left_get(a_base, a_field, (r_node)); \
-	} while (0)
-
-#define trpn_rotate_left(a_base, a_field, a_node, r_node) \
-	do { \
-		(r_node) = trp_right_get(a_base, a_field, (a_node)); \
-		trp_right_set(a_base, a_field, (a_node), \
-			trp_left_get(a_base, a_field, (r_node))); \
-		trp_left_set(a_base, a_field, (r_node), (a_node)); \
-	} while (0)
-
-#define trpn_rotate_right(a_base, a_field, a_node, r_node) \
-	do { \
-		(r_node) = trp_left_get(a_base, a_field, (a_node)); \
-		trp_left_set(a_base, a_field, (a_node), \
-			trp_right_get(a_base, a_field, (r_node))); \
-		trp_right_set(a_base, a_field, (r_node), (a_node)); \
-	} while (0)
-
-#define trp_gen(a_attr, a_pre, a_type, a_field, a_base, a_cmp) \
-a_attr a_type MAYBE_UNUSED *a_pre##first(struct trp_root *treap) \
-{ \
-	uint32_t ret; \
-	trpn_first(a_base, a_field, treap->trp_root, ret); \
-	return trpn_pointer(a_base, ret); \
-} \
-a_attr a_type MAYBE_UNUSED *a_pre##next(struct trp_root *treap, a_type *node) \
-{ \
-	uint32_t ret; \
-	uint32_t offset = trpn_offset(a_base, node); \
-	if (~trp_right_get(a_base, a_field, offset)) { \
-		trpn_first(a_base, a_field, \
-			trp_right_get(a_base, a_field, offset), ret); \
-	} else { \
-		uint32_t tnode = treap->trp_root; \
-		ret = ~0; \
-		while (1) { \
-			int cmp = (a_cmp)(trpn_pointer(a_base, offset), \
-				trpn_pointer(a_base, tnode)); \
-			if (cmp < 0) { \
-				ret = tnode; \
-				tnode = trp_left_get(a_base, a_field, tnode); \
-			} else if (cmp > 0) { \
-				tnode = trp_right_get(a_base, a_field, tnode); \
-			} else { \
-				break; \
-			} \
-		} \
-	} \
-	return trpn_pointer(a_base, ret); \
-} \
-a_attr a_type MAYBE_UNUSED *a_pre##search(struct trp_root *treap, a_type *key) \
-{ \
-	int cmp; \
-	uint32_t ret = treap->trp_root; \
-	while (~ret && (cmp = (a_cmp)(key, trpn_pointer(a_base, ret)))) { \
-		if (cmp < 0) { \
-			ret = trp_left_get(a_base, a_field, ret); \
-		} else { \
-			ret = trp_right_get(a_base, a_field, ret); \
-		} \
-	} \
-	return trpn_pointer(a_base, ret); \
-} \
-a_attr a_type MAYBE_UNUSED *a_pre##nsearch(struct trp_root *treap, a_type *key) \
-{ \
-	int cmp; \
-	uint32_t ret = treap->trp_root; \
-	while (~ret && (cmp = (a_cmp)(key, trpn_pointer(a_base, ret)))) { \
-		if (cmp < 0) { \
-			if (!~trp_left_get(a_base, a_field, ret)) \
-				break; \
-			ret = trp_left_get(a_base, a_field, ret); \
-		} else { \
-			ret = trp_right_get(a_base, a_field, ret); \
-		} \
-	} \
-	return trpn_pointer(a_base, ret); \
-} \
-a_attr uint32_t MAYBE_UNUSED a_pre##insert_recurse(uint32_t cur_node, uint32_t ins_node) \
-{ \
-	if (cur_node == ~0) { \
-		return ins_node; \
-	} else { \
-		uint32_t ret; \
-		int cmp = (a_cmp)(trpn_pointer(a_base, ins_node), \
-					trpn_pointer(a_base, cur_node)); \
-		if (cmp < 0) { \
-			uint32_t left = a_pre##insert_recurse( \
-				trp_left_get(a_base, a_field, cur_node), ins_node); \
-			trp_left_set(a_base, a_field, cur_node, left); \
-			if (trp_prio_get(left) < trp_prio_get(cur_node)) \
-				trpn_rotate_right(a_base, a_field, cur_node, ret); \
-			else \
-				ret = cur_node; \
-		} else { \
-			uint32_t right = a_pre##insert_recurse( \
-				trp_right_get(a_base, a_field, cur_node), ins_node); \
-			trp_right_set(a_base, a_field, cur_node, right); \
-			if (trp_prio_get(right) < trp_prio_get(cur_node)) \
-				trpn_rotate_left(a_base, a_field, cur_node, ret); \
-			else \
-				ret = cur_node; \
-		} \
-		return ret; \
-	} \
-} \
-a_attr a_type *MAYBE_UNUSED a_pre##insert(struct trp_root *treap, a_type *node) \
-{ \
-	uint32_t offset = trpn_offset(a_base, node); \
-	trp_node_new(a_base, a_field, offset); \
-	treap->trp_root = a_pre##insert_recurse(treap->trp_root, offset); \
-	return trpn_pointer(a_base, offset); \
-} \
-a_attr uint32_t MAYBE_UNUSED a_pre##remove_recurse(uint32_t cur_node, uint32_t rem_node) \
-{ \
-	int cmp = a_cmp(trpn_pointer(a_base, rem_node), \
-			trpn_pointer(a_base, cur_node)); \
-	if (cmp == 0) { \
-		uint32_t ret; \
-		uint32_t left = trp_left_get(a_base, a_field, cur_node); \
-		uint32_t right = trp_right_get(a_base, a_field, cur_node); \
-		if (left == ~0) { \
-			if (right == ~0) \
-				return ~0; \
-		} else if (right == ~0 || trp_prio_get(left) < trp_prio_get(right)) { \
-			trpn_rotate_right(a_base, a_field, cur_node, ret); \
-			right = a_pre##remove_recurse(cur_node, rem_node); \
-			trp_right_set(a_base, a_field, ret, right); \
-			return ret; \
-		} \
-		trpn_rotate_left(a_base, a_field, cur_node, ret); \
-		left = a_pre##remove_recurse(cur_node, rem_node); \
-		trp_left_set(a_base, a_field, ret, left); \
-		return ret; \
-	} else if (cmp < 0) { \
-		uint32_t left = a_pre##remove_recurse( \
-			trp_left_get(a_base, a_field, cur_node), rem_node); \
-		trp_left_set(a_base, a_field, cur_node, left); \
-		return cur_node; \
-	} else { \
-		uint32_t right = a_pre##remove_recurse( \
-			trp_right_get(a_base, a_field, cur_node), rem_node); \
-		trp_right_set(a_base, a_field, cur_node, right); \
-		return cur_node; \
-	} \
-} \
-a_attr void MAYBE_UNUSED a_pre##remove(struct trp_root *treap, a_type *node) \
-{ \
-	treap->trp_root = a_pre##remove_recurse(treap->trp_root, \
-		trpn_offset(a_base, node)); \
-} \
-
-#endif
diff --git a/vcs-svn/trp.txt b/vcs-svn/trp.txt
deleted file mode 100644
index 5ca6b42..0000000
--- a/vcs-svn/trp.txt
+++ /dev/null
@@ -1,109 +0,0 @@
-Motivation
-==========
-
-Treaps provide a memory-efficient binary search tree structure.
-Insertion/deletion/search are about as about as fast in the average
-case as red-black trees and the chances of worst-case behavior are
-vanishingly small, thanks to (pseudo-)randomness.  The bad worst-case
-behavior is a small price to pay, given that treaps are much simpler
-to implement.
-
-API
-===
-
-The trp API generates a data structure and functions to handle a
-large growing set of objects stored in a pool.
-
-The caller:
-
-. Specifies parameters for the generated functions with the
-  trp_gen(static, foo_, ...) macro.
-
-. Allocates a `struct trp_root` variable and sets it to {~0}.
-
-. Adds new nodes to the set using `foo_insert`.  Any pointers
-  to existing nodes cannot be relied upon any more, so the caller
-  might retrieve them anew with `foo_pointer`.
-
-. Can find a specific item in the set using `foo_search`.
-
-. Can iterate over items in the set using `foo_first` and `foo_next`.
-
-. Can remove an item from the set using `foo_remove`.
-
-Example:
-
-----
-struct ex_node {
-	const char *s;
-	struct trp_node ex_link;
-};
-static struct trp_root ex_base = {~0};
-obj_pool_gen(ex, struct ex_node, 4096);
-trp_gen(static, ex_, struct ex_node, ex_link, ex, strcmp)
-struct ex_node *item;
-
-item = ex_pointer(ex_alloc(1));
-item->s = "hello";
-ex_insert(&ex_base, item);
-item = ex_pointer(ex_alloc(1));
-item->s = "goodbye";
-ex_insert(&ex_base, item);
-for (item = ex_first(&ex_base); item; item = ex_next(&ex_base, item))
-	printf("%s\n", item->s);
-----
-
-Functions
----------
-
-trp_gen(attr, foo_, node_type, link_field, pool, cmp)::
-
-	Generate a type-specific treap implementation.
-+
-. The storage class for generated functions will be 'attr' (e.g., `static`).
-. Generated function names are prefixed with 'foo_' (e.g., `treap_`).
-. Treap nodes will be of type 'node_type' (e.g., `struct treap_node`).
-  This type must be a struct with at least one `struct trp_node` field
-  to point to its children.
-. The field used to access child nodes will be 'link_field'.
-. All treap nodes must lie in the 'pool' object pool.
-. Treap nodes must be totally ordered by the 'cmp' relation, with the
-  following prototype:
-+
-int (*cmp)(node_type \*a, node_type \*b)
-+
-and returning a value less than, equal to, or greater than zero
-according to the result of comparison.
-
-node_type {asterisk}foo_insert(struct trp_root *treap, node_type \*node)::
-
-	Insert node into treap.  If inserted multiple times,
-	a node will appear in the treap multiple times.
-+
-The return value is the address of the node within the treap,
-which might differ from `node` if `pool_alloc` had to call
-`realloc` to expand the pool.
-
-void foo_remove(struct trp_root *treap, node_type \*node)::
-
-	Remove node from treap.  Caller must ensure node is
-	present in treap before using this function.
-
-node_type *foo_search(struct trp_root \*treap, node_type \*key)::
-
-	Search for a node that matches key.  If no match is found,
-	result is NULL.
-
-node_type *foo_nsearch(struct trp_root \*treap, node_type \*key)::
-
-	Like `foo_search`, but if if the key is missing return what
-	would be key's successor, were key in treap (NULL if no
-	successor).
-
-node_type *foo_first(struct trp_root \*treap)::
-
-	Find the first item from the treap, in sorted order.
-
-node_type *foo_next(struct trp_root \*treap, node_type \*node)::
-
-	Find the next item.
-- 
1.7.3.2.846.gf4b062

^ permalink raw reply related	[flat|nested] 72+ messages in thread

* [PATCH 8/9] vcs-svn: drop obj_pool.h
  2011-03-19  7:03 vcs-svn: purge obsolete data structures and code David Barr
                   ` (6 preceding siblings ...)
  2011-03-19  7:03 ` [PATCH 7/9] vcs-svn: drop trp.h David Barr
@ 2011-03-19  7:03 ` David Barr
  2011-03-19  7:03 ` [PATCH 9/9] vcs-svn: use strchr to find RFC822 delimiter David Barr
                   ` (2 subsequent siblings)
  10 siblings, 0 replies; 72+ messages in thread
From: David Barr @ 2011-03-19  7:03 UTC (permalink / raw)
  To: Git Mailing List
  Cc: Jonathan Nieder, Ramkumar Ramachandra, Sverre Rabbelier,
	Sam Vilain, Stephen Bash, Tomas Carnecky, David Barr

Signed-off-by: David Barr <david.barr@cordelta.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: David Barr <david.barr@cordelta.com>
---
 .gitignore         |    1 -
 Makefile           |    2 -
 t/t0080-vcs-svn.sh |   79 -----------------------------------
 test-obj-pool.c    |  116 ----------------------------------------------------
 vcs-svn/obj_pool.h |   61 ---------------------------
 5 files changed, 0 insertions(+), 259 deletions(-)
 delete mode 100755 t/t0080-vcs-svn.sh
 delete mode 100644 test-obj-pool.c
 delete mode 100644 vcs-svn/obj_pool.h

diff --git a/.gitignore b/.gitignore
index aa94ff1..789f922 100644
--- a/.gitignore
+++ b/.gitignore
@@ -171,7 +171,6 @@
 /test-line-buffer
 /test-match-trees
 /test-mktemp
-/test-obj-pool
 /test-parse-options
 /test-path-utils
 /test-run-command
diff --git a/Makefile b/Makefile
index 2d56ab9..6165609 100644
--- a/Makefile
+++ b/Makefile
@@ -424,7 +424,6 @@ TEST_PROGRAMS_NEED_X += test-dump-cache-tree
 TEST_PROGRAMS_NEED_X += test-genrandom
 TEST_PROGRAMS_NEED_X += test-line-buffer
 TEST_PROGRAMS_NEED_X += test-match-trees
-TEST_PROGRAMS_NEED_X += test-obj-pool
 TEST_PROGRAMS_NEED_X += test-parse-options
 TEST_PROGRAMS_NEED_X += test-path-utils
 TEST_PROGRAMS_NEED_X += test-run-command
@@ -1962,7 +1961,6 @@ xdiff-interface.o $(XDIFF_OBJS): \
 	xdiff/xutils.h xdiff/xprepare.h xdiff/xdiffi.h xdiff/xemit.h
 
 $(VCSSVN_OBJS) $(VCSSVN_TEST_OBJS): $(LIB_H) \
-	vcs-svn/obj_pool.h \
 	vcs-svn/line_buffer.h vcs-svn/repo_tree.h vcs-svn/fast_export.h \
 	vcs-svn/svndump.h
 
diff --git a/t/t0080-vcs-svn.sh b/t/t0080-vcs-svn.sh
deleted file mode 100755
index 3f29496..0000000
--- a/t/t0080-vcs-svn.sh
+++ /dev/null
@@ -1,79 +0,0 @@
-#!/bin/sh
-
-test_description='check infrastructure for svn importer'
-
-. ./test-lib.sh
-uint32_max=4294967295
-
-test_expect_success 'obj pool: store data' '
-	cat <<-\EOF >expected &&
-	0
-	1
-	EOF
-
-	test-obj-pool <<-\EOF >actual &&
-	alloc one 16
-	set one 13
-	test one 13
-	reset one
-	EOF
-	test_cmp expected actual
-'
-
-test_expect_success 'obj pool: NULL is offset ~0' '
-	echo "$uint32_max" >expected &&
-	echo null one | test-obj-pool >actual &&
-	test_cmp expected actual
-'
-
-test_expect_success 'obj pool: out-of-bounds access' '
-	cat <<-EOF >expected &&
-	0
-	0
-	$uint32_max
-	$uint32_max
-	16
-	20
-	$uint32_max
-	EOF
-
-	test-obj-pool <<-\EOF >actual &&
-	alloc one 16
-	alloc two 16
-	offset one 20
-	offset two 20
-	alloc one 5
-	offset one 20
-	free one 1
-	offset one 20
-	reset one
-	reset two
-	EOF
-	test_cmp expected actual
-'
-
-test_expect_success 'obj pool: high-water mark' '
-	cat <<-\EOF >expected &&
-	0
-	0
-	10
-	20
-	20
-	20
-	EOF
-
-	test-obj-pool <<-\EOF >actual &&
-	alloc one 10
-	committed one
-	alloc one 10
-	commit one
-	committed one
-	alloc one 10
-	free one 20
-	committed one
-	reset one
-	EOF
-	test_cmp expected actual
-'
-
-test_done
diff --git a/test-obj-pool.c b/test-obj-pool.c
deleted file mode 100644
index 5018863..0000000
--- a/test-obj-pool.c
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * test-obj-pool.c: code to exercise the svn importer's object pool
- */
-
-#include "cache.h"
-#include "vcs-svn/obj_pool.h"
-
-enum pool { POOL_ONE, POOL_TWO };
-obj_pool_gen(one, int, 1)
-obj_pool_gen(two, int, 4096)
-
-static uint32_t strtouint32(const char *s)
-{
-	char *end;
-	uintmax_t n = strtoumax(s, &end, 10);
-	if (*s == '\0' || (*end != '\n' && *end != '\0'))
-		die("invalid offset: %s", s);
-	return (uint32_t) n;
-}
-
-static void handle_command(const char *command, enum pool pool, const char *arg)
-{
-	switch (*command) {
-	case 'a':
-		if (!prefixcmp(command, "alloc ")) {
-			uint32_t n = strtouint32(arg);
-			printf("%"PRIu32"\n",
-				pool == POOL_ONE ?
-				one_alloc(n) : two_alloc(n));
-			return;
-		}
-	case 'c':
-		if (!prefixcmp(command, "commit ")) {
-			pool == POOL_ONE ? one_commit() : two_commit();
-			return;
-		}
-		if (!prefixcmp(command, "committed ")) {
-			printf("%"PRIu32"\n",
-				pool == POOL_ONE ?
-				one_pool.committed : two_pool.committed);
-			return;
-		}
-	case 'f':
-		if (!prefixcmp(command, "free ")) {
-			uint32_t n = strtouint32(arg);
-			pool == POOL_ONE ? one_free(n) : two_free(n);
-			return;
-		}
-	case 'n':
-		if (!prefixcmp(command, "null ")) {
-			printf("%"PRIu32"\n",
-				pool == POOL_ONE ?
-				one_offset(NULL) : two_offset(NULL));
-			return;
-		}
-	case 'o':
-		if (!prefixcmp(command, "offset ")) {
-			uint32_t n = strtouint32(arg);
-			printf("%"PRIu32"\n",
-				pool == POOL_ONE ?
-				one_offset(one_pointer(n)) :
-				two_offset(two_pointer(n)));
-			return;
-		}
-	case 'r':
-		if (!prefixcmp(command, "reset ")) {
-			pool == POOL_ONE ? one_reset() : two_reset();
-			return;
-		}
-	case 's':
-		if (!prefixcmp(command, "set ")) {
-			uint32_t n = strtouint32(arg);
-			if (pool == POOL_ONE)
-				*one_pointer(n) = 1;
-			else
-				*two_pointer(n) = 1;
-			return;
-		}
-	case 't':
-		if (!prefixcmp(command, "test ")) {
-			uint32_t n = strtouint32(arg);
-			printf("%d\n", pool == POOL_ONE ?
-				*one_pointer(n) : *two_pointer(n));
-			return;
-		}
-	default:
-		die("unrecognized command: %s", command);
-	}
-}
-
-static void handle_line(const char *line)
-{
-	const char *arg = strchr(line, ' ');
-	enum pool pool;
-
-	if (arg && !prefixcmp(arg + 1, "one"))
-		pool = POOL_ONE;
-	else if (arg && !prefixcmp(arg + 1, "two"))
-		pool = POOL_TWO;
-	else
-		die("no pool specified: %s", line);
-
-	handle_command(line, pool, arg + strlen("one "));
-}
-
-int main(int argc, char *argv[])
-{
-	struct strbuf sb = STRBUF_INIT;
-	if (argc != 1)
-		usage("test-obj-str < script");
-
-	while (strbuf_getline(&sb, stdin, '\n') != EOF)
-		handle_line(sb.buf);
-	strbuf_release(&sb);
-	return 0;
-}
diff --git a/vcs-svn/obj_pool.h b/vcs-svn/obj_pool.h
deleted file mode 100644
index deb6eb8..0000000
--- a/vcs-svn/obj_pool.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Licensed under a two-clause BSD-style license.
- * See LICENSE for details.
- */
-
-#ifndef OBJ_POOL_H_
-#define OBJ_POOL_H_
-
-#include "git-compat-util.h"
-
-#define MAYBE_UNUSED __attribute__((__unused__))
-
-#define obj_pool_gen(pre, obj_t, initial_capacity) \
-static struct { \
-	uint32_t committed; \
-	uint32_t size; \
-	uint32_t capacity; \
-	obj_t *base; \
-} pre##_pool = {0, 0, 0, NULL}; \
-static MAYBE_UNUSED uint32_t pre##_alloc(uint32_t count) \
-{ \
-	uint32_t offset; \
-	if (pre##_pool.size + count > pre##_pool.capacity) { \
-		while (pre##_pool.size + count > pre##_pool.capacity) \
-			if (pre##_pool.capacity) \
-				pre##_pool.capacity *= 2; \
-			else \
-				pre##_pool.capacity = initial_capacity; \
-		pre##_pool.base = realloc(pre##_pool.base, \
-					pre##_pool.capacity * sizeof(obj_t)); \
-	} \
-	offset = pre##_pool.size; \
-	pre##_pool.size += count; \
-	return offset; \
-} \
-static MAYBE_UNUSED void pre##_free(uint32_t count) \
-{ \
-	pre##_pool.size -= count; \
-} \
-static MAYBE_UNUSED uint32_t pre##_offset(obj_t *obj) \
-{ \
-	return obj == NULL ? ~0 : obj - pre##_pool.base; \
-} \
-static MAYBE_UNUSED obj_t *pre##_pointer(uint32_t offset) \
-{ \
-	return offset >= pre##_pool.size ? NULL : &pre##_pool.base[offset]; \
-} \
-static MAYBE_UNUSED void pre##_commit(void) \
-{ \
-	pre##_pool.committed = pre##_pool.size; \
-} \
-static MAYBE_UNUSED void pre##_reset(void) \
-{ \
-	free(pre##_pool.base); \
-	pre##_pool.base = NULL; \
-	pre##_pool.size = 0; \
-	pre##_pool.capacity = 0; \
-	pre##_pool.committed = 0; \
-}
-
-#endif
-- 
1.7.3.2.846.gf4b062

^ permalink raw reply related	[flat|nested] 72+ messages in thread

* [PATCH 9/9] vcs-svn: use strchr to find RFC822 delimiter
  2011-03-19  7:03 vcs-svn: purge obsolete data structures and code David Barr
                   ` (7 preceding siblings ...)
  2011-03-19  7:03 ` [PATCH 8/9] vcs-svn: drop obj_pool.h David Barr
@ 2011-03-19  7:03 ` David Barr
  2011-03-19  9:10   ` Jonathan Nieder
  2011-03-19  7:20 ` vcs-svn: integrate support for text deltas David Barr
  2011-03-21 23:49 ` [PATCHv2 00/11] vcs-svn: purge obsolete data structures and code David Barr
  10 siblings, 1 reply; 72+ messages in thread
From: David Barr @ 2011-03-19  7:03 UTC (permalink / raw)
  To: Git Mailing List
  Cc: Jonathan Nieder, Ramkumar Ramachandra, Sverre Rabbelier,
	Sam Vilain, Stephen Bash, Tomas Carnecky, David Barr

This is a small optimisation (4% reduction in user time) but is the largest
artifact within the parsing portion of svndump.c

Signed-off-by: David Barr <david.barr@cordelta.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: David Barr <david.barr@cordelta.com>
---
 vcs-svn/svndump.c |    4 +++-
 1 files changed, 3 insertions(+), 1 deletions(-)

diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c
index fd67db8..7bc2d3d 100644
--- a/vcs-svn/svndump.c
+++ b/vcs-svn/svndump.c
@@ -296,10 +296,12 @@ void svndump_read(const char *url)
 
 	reset_dump_ctx(url);
 	while ((t = buffer_read_line(&input))) {
-		val = strstr(t, ": ");
+		val = strchr(t, ':');
 		if (!val)
 			continue;
 		*val++ = '\0';
+		if (*val != ' ')
+			continue;
 		*val++ = '\0';
 
 		/* strlen(key) */
-- 
1.7.3.2.846.gf4b062

^ permalink raw reply related	[flat|nested] 72+ messages in thread

* vcs-svn: integrate support for text deltas
  2011-03-19  7:03 vcs-svn: purge obsolete data structures and code David Barr
                   ` (8 preceding siblings ...)
  2011-03-19  7:03 ` [PATCH 9/9] vcs-svn: use strchr to find RFC822 delimiter David Barr
@ 2011-03-19  7:20 ` David Barr
  2011-03-19  7:20   ` [PATCH 01/16] vcs-svn: improve support for reading large files David Barr
                     ` (16 more replies)
  2011-03-21 23:49 ` [PATCHv2 00/11] vcs-svn: purge obsolete data structures and code David Barr
  10 siblings, 17 replies; 72+ messages in thread
From: David Barr @ 2011-03-19  7:20 UTC (permalink / raw)
  To: Git Mailing List
  Cc: Jonathan Nieder, Ramkumar Ramachandra, Sverre Rabbelier,
	Sam Vilain, Stephen Bash, Tomas Carnecky

This series is largely the work of Jonathan Nieder.
I have painstakingly ported it from the old development branch to the
latest series to hit the list. As previously, I have tested against
the ASF subversion repository to increase confidence in the series.
Hopefully, this brings us a little closer to having full support for
version 3 of the subversion dump format in master.


 Makefile                  |    5 +-
 contrib/svn-fe/svn-fe.txt |    5 +-
 t/t9010-svn-fe.sh         |  108 ++++++++++++++++-
 t/t9011-svn-da.sh         |  250 ++++++++++++++++++++++++++++++++++++
 test-svn-fe.c             |   42 +++++--
 vcs-svn/LICENSE           |    2 +
 vcs-svn/fast_export.c     |  122 +++++++++++++++++-
 vcs-svn/fast_export.h     |    3 +
 vcs-svn/line_buffer.c     |   37 +++---
 vcs-svn/line_buffer.h     |    7 +-
 vcs-svn/line_buffer.txt   |    3 +-
 vcs-svn/sliding_window.c  |   74 +++++++++++
 vcs-svn/sliding_window.h  |   17 +++
 vcs-svn/svndiff.c         |  308 +++++++++++++++++++++++++++++++++++++++++++++
 vcs-svn/svndiff.h         |   10 ++
 vcs-svn/svndump.c         |   35 ++++-
 16 files changed, 980 insertions(+), 48 deletions(-)

^ permalink raw reply	[flat|nested] 72+ messages in thread

* [PATCH 01/16] vcs-svn: improve support for reading large files
  2011-03-19  7:20 ` vcs-svn: integrate support for text deltas David Barr
@ 2011-03-19  7:20   ` David Barr
  2011-03-19  7:20   ` [PATCH 02/16] vcs-svn: make buffer_skip_bytes return length read David Barr
                     ` (15 subsequent siblings)
  16 siblings, 0 replies; 72+ messages in thread
From: David Barr @ 2011-03-19  7:20 UTC (permalink / raw)
  To: Git Mailing List
  Cc: Jonathan Nieder, Ramkumar Ramachandra, Sverre Rabbelier,
	Sam Vilain, Stephen Bash, Tomas Carnecky, David Barr

From: Jonathan Nieder <jrnieder@gmail.com>

Move from uint32_t to off_t as the fundamental unit of length used by
the line_buffer library.  Performance would get worse if anything but
I think it's worth it for support of deltas that need to skip large
pieces (> 4 GiB).

Exception: buffer_read_string still takes a uint32_t, since it keeps
its result in an in-core obj_pool.

Callers still have to be updated to take advantage of this.

Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: David Barr <david.barr@cordelta.com>
---
 vcs-svn/line_buffer.c |    8 ++++----
 vcs-svn/line_buffer.h |    4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/vcs-svn/line_buffer.c b/vcs-svn/line_buffer.c
index eb8a6a7..6930aac 100644
--- a/vcs-svn/line_buffer.c
+++ b/vcs-svn/line_buffer.c
@@ -104,10 +104,10 @@ void buffer_read_binary(struct line_buffer *buf,
 	strbuf_fread(sb, size, buf->infile);
 }
 
-void buffer_copy_bytes(struct line_buffer *buf, uint32_t len)
+void buffer_copy_bytes(struct line_buffer *buf, off_t len)
 {
 	char byte_buffer[COPY_BUFFER_LEN];
-	uint32_t in;
+	off_t in;
 	while (len > 0 && !feof(buf->infile) && !ferror(buf->infile)) {
 		in = len < COPY_BUFFER_LEN ? len : COPY_BUFFER_LEN;
 		in = fread(byte_buffer, 1, in, buf->infile);
@@ -120,10 +120,10 @@ void buffer_copy_bytes(struct line_buffer *buf, uint32_t len)
 	}
 }
 
-void buffer_skip_bytes(struct line_buffer *buf, uint32_t len)
+void buffer_skip_bytes(struct line_buffer *buf, off_t len)
 {
 	char byte_buffer[COPY_BUFFER_LEN];
-	uint32_t in;
+	off_t in;
 	while (len > 0 && !feof(buf->infile) && !ferror(buf->infile)) {
 		in = len < COPY_BUFFER_LEN ? len : COPY_BUFFER_LEN;
 		in = fread(byte_buffer, 1, in, buf->infile);
diff --git a/vcs-svn/line_buffer.h b/vcs-svn/line_buffer.h
index 3c9629e..a090dd6 100644
--- a/vcs-svn/line_buffer.h
+++ b/vcs-svn/line_buffer.h
@@ -26,7 +26,7 @@ char *buffer_read_line(struct line_buffer *buf);
 char *buffer_read_string(struct line_buffer *buf, uint32_t len);
 int buffer_read_char(struct line_buffer *buf);
 void buffer_read_binary(struct line_buffer *buf, struct strbuf *sb, uint32_t len);
-void buffer_copy_bytes(struct line_buffer *buf, uint32_t len);
-void buffer_skip_bytes(struct line_buffer *buf, uint32_t len);
+void buffer_copy_bytes(struct line_buffer *buf, off_t len);
+void buffer_skip_bytes(struct line_buffer *buf, off_t len);
 
 #endif
-- 
1.7.3.2.846.gf4b062

^ permalink raw reply related	[flat|nested] 72+ messages in thread

* [PATCH 02/16] vcs-svn: make buffer_skip_bytes return length read
  2011-03-19  7:20 ` vcs-svn: integrate support for text deltas David Barr
  2011-03-19  7:20   ` [PATCH 01/16] vcs-svn: improve support for reading large files David Barr
@ 2011-03-19  7:20   ` David Barr
  2011-03-19  7:20   ` [PATCH 03/16] vcs-svn: make buffer_copy_bytes " David Barr
                     ` (14 subsequent siblings)
  16 siblings, 0 replies; 72+ messages in thread
From: David Barr @ 2011-03-19  7:20 UTC (permalink / raw)
  To: Git Mailing List
  Cc: Jonathan Nieder, Ramkumar Ramachandra, Sverre Rabbelier,
	Sam Vilain, Stephen Bash, Tomas Carnecky, David Barr

From: Jonathan Nieder <jrnieder@gmail.com>

Currently there is no way to detect when input ended if it ended
early during buffer_skip_bytes.  Tell the calling program how many
bytes were actually skipped for easier debugging.

Existing callers will still ignore early EOF.

Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: David Barr <david.barr@cordelta.com>
---
 vcs-svn/line_buffer.c   |   13 +++++++------
 vcs-svn/line_buffer.h   |    2 +-
 vcs-svn/line_buffer.txt |    3 ++-
 3 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/vcs-svn/line_buffer.c b/vcs-svn/line_buffer.c
index 6930aac..a78c5d8 100644
--- a/vcs-svn/line_buffer.c
+++ b/vcs-svn/line_buffer.c
@@ -120,15 +120,16 @@ void buffer_copy_bytes(struct line_buffer *buf, off_t len)
 	}
 }
 
-void buffer_skip_bytes(struct line_buffer *buf, off_t len)
+off_t buffer_skip_bytes(struct line_buffer *buf, off_t nbytes)
 {
 	char byte_buffer[COPY_BUFFER_LEN];
-	off_t in;
-	while (len > 0 && !feof(buf->infile) && !ferror(buf->infile)) {
-		in = len < COPY_BUFFER_LEN ? len : COPY_BUFFER_LEN;
-		in = fread(byte_buffer, 1, in, buf->infile);
-		len -= in;
+	off_t done = 0;
+	while (done < nbytes && !feof(buf->infile) && !ferror(buf->infile)) {
+		off_t len = nbytes - done;
+		off_t in = len < COPY_BUFFER_LEN ? len : COPY_BUFFER_LEN;
+		done += fread(byte_buffer, 1, in, buf->infile);
 	}
+	return done;
 }
 
 void buffer_reset(struct line_buffer *buf)
diff --git a/vcs-svn/line_buffer.h b/vcs-svn/line_buffer.h
index a090dd6..7d10f9c 100644
--- a/vcs-svn/line_buffer.h
+++ b/vcs-svn/line_buffer.h
@@ -27,6 +27,6 @@ char *buffer_read_string(struct line_buffer *buf, uint32_t len);
 int buffer_read_char(struct line_buffer *buf);
 void buffer_read_binary(struct line_buffer *buf, struct strbuf *sb, uint32_t len);
 void buffer_copy_bytes(struct line_buffer *buf, off_t len);
-void buffer_skip_bytes(struct line_buffer *buf, off_t len);
+off_t buffer_skip_bytes(struct line_buffer *buf, off_t len);
 
 #endif
diff --git a/vcs-svn/line_buffer.txt b/vcs-svn/line_buffer.txt
index e89cc41..4ef0755 100644
--- a/vcs-svn/line_buffer.txt
+++ b/vcs-svn/line_buffer.txt
@@ -76,7 +76,8 @@ Functions
 
 `buffer_skip_bytes`::
 	Discards `len` bytes from the input stream (stopping early
-	if necessary because of an error or eof).
+	if necessary because of an error or eof).  Return value is
+	the number of bytes successfully read.
 
 `buffer_reset`::
 	Deallocates non-static buffers.
-- 
1.7.3.2.846.gf4b062

^ permalink raw reply related	[flat|nested] 72+ messages in thread

* [PATCH 03/16] vcs-svn: make buffer_copy_bytes return length read
  2011-03-19  7:20 ` vcs-svn: integrate support for text deltas David Barr
  2011-03-19  7:20   ` [PATCH 01/16] vcs-svn: improve support for reading large files David Barr
  2011-03-19  7:20   ` [PATCH 02/16] vcs-svn: make buffer_skip_bytes return length read David Barr
@ 2011-03-19  7:20   ` David Barr
  2011-03-19  7:20   ` [PATCH 04/16] vcs-svn: improve reporting of input errors David Barr
                     ` (13 subsequent siblings)
  16 siblings, 0 replies; 72+ messages in thread
From: David Barr @ 2011-03-19  7:20 UTC (permalink / raw)
  To: Git Mailing List
  Cc: Jonathan Nieder, Ramkumar Ramachandra, Sverre Rabbelier,
	Sam Vilain, Stephen Bash, Tomas Carnecky, David Barr

From: Jonathan Nieder <jrnieder@gmail.com>

Currently buffer_copy_bytes does not report to its caller whether
it encountered an early end of file.

Add a return value representing the number of bytes read (but not
the number of bytes copied).  This way all three unusual conditions
can be distinguished: input error with buffer_ferror, output error
with ferror(outfile), early end of input by checking the return
value.

Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: David Barr <david.barr@cordelta.com>
---
 vcs-svn/line_buffer.c |   18 +++++++++---------
 vcs-svn/line_buffer.h |    3 ++-
 2 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/vcs-svn/line_buffer.c b/vcs-svn/line_buffer.c
index a78c5d8..08d7cae 100644
--- a/vcs-svn/line_buffer.c
+++ b/vcs-svn/line_buffer.c
@@ -104,20 +104,20 @@ void buffer_read_binary(struct line_buffer *buf,
 	strbuf_fread(sb, size, buf->infile);
 }
 
-void buffer_copy_bytes(struct line_buffer *buf, off_t len)
+off_t buffer_copy_bytes(struct line_buffer *buf, off_t nbytes)
 {
 	char byte_buffer[COPY_BUFFER_LEN];
-	off_t in;
-	while (len > 0 && !feof(buf->infile) && !ferror(buf->infile)) {
-		in = len < COPY_BUFFER_LEN ? len : COPY_BUFFER_LEN;
+	off_t done = 0;
+	while (done < nbytes && !feof(buf->infile) && !ferror(buf->infile)) {
+		off_t len = nbytes - done;
+		off_t in = len < COPY_BUFFER_LEN ? len : COPY_BUFFER_LEN;
 		in = fread(byte_buffer, 1, in, buf->infile);
-		len -= in;
+		done += in;
 		fwrite(byte_buffer, 1, in, stdout);
-		if (ferror(stdout)) {
-			buffer_skip_bytes(buf, len);
-			return;
-		}
+		if (ferror(stdout))
+			return done + buffer_skip_bytes(buf, nbytes - done);
 	}
+	return done;
 }
 
 off_t buffer_skip_bytes(struct line_buffer *buf, off_t nbytes)
diff --git a/vcs-svn/line_buffer.h b/vcs-svn/line_buffer.h
index 7d10f9c..f5c468a 100644
--- a/vcs-svn/line_buffer.h
+++ b/vcs-svn/line_buffer.h
@@ -26,7 +26,8 @@ char *buffer_read_line(struct line_buffer *buf);
 char *buffer_read_string(struct line_buffer *buf, uint32_t len);
 int buffer_read_char(struct line_buffer *buf);
 void buffer_read_binary(struct line_buffer *buf, struct strbuf *sb, uint32_t len);
-void buffer_copy_bytes(struct line_buffer *buf, off_t len);
+/* Returns number of bytes read (not necessarily written). */
+off_t buffer_copy_bytes(struct line_buffer *buf, off_t len);
 off_t buffer_skip_bytes(struct line_buffer *buf, off_t len);
 
 #endif
-- 
1.7.3.2.846.gf4b062

^ permalink raw reply related	[flat|nested] 72+ messages in thread

* [PATCH 04/16] vcs-svn: improve reporting of input errors
  2011-03-19  7:20 ` vcs-svn: integrate support for text deltas David Barr
                     ` (2 preceding siblings ...)
  2011-03-19  7:20   ` [PATCH 03/16] vcs-svn: make buffer_copy_bytes " David Barr
@ 2011-03-19  7:20   ` David Barr
  2011-03-19  7:20   ` [PATCH 05/16] vcs-svn: learn to maintain a sliding view of a file David Barr
                     ` (12 subsequent siblings)
  16 siblings, 0 replies; 72+ messages in thread
From: David Barr @ 2011-03-19  7:20 UTC (permalink / raw)
  To: Git Mailing List
  Cc: Jonathan Nieder, Ramkumar Ramachandra, Sverre Rabbelier,
	Sam Vilain, Stephen Bash, Tomas Carnecky, David Barr

From: Jonathan Nieder <jrnieder@gmail.com>

Catch input errors and exit early enough to print a reasonable
diagnosis based on errno.

Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: David Barr <david.barr@cordelta.com>
---
 vcs-svn/fast_export.c |   13 +++++++++++--
 vcs-svn/svndump.c     |   22 +++++++++++++++++++---
 2 files changed, 30 insertions(+), 5 deletions(-)

diff --git a/vcs-svn/fast_export.c b/vcs-svn/fast_export.c
index 1d50512..4748253 100644
--- a/vcs-svn/fast_export.c
+++ b/vcs-svn/fast_export.c
@@ -120,15 +120,24 @@ static const char *get_response_line(void)
 	die("unexpected end of fast-import feedback");
 }
 
+static void die_short_read(struct line_buffer *input)
+{
+	if (buffer_ferror(input))
+		die_errno("error reading dump file");
+	die("invalid dump: unexpected end of file");
+}
+
 void fast_export_data(uint32_t mode, uint32_t len, struct line_buffer *input)
 {
 	if (mode == REPO_MODE_LNK) {
 		/* svn symlink blobs start with "link " */
-		buffer_skip_bytes(input, 5);
 		len -= 5;
+		if (buffer_skip_bytes(input, 5) != 5)
+			die_short_read(input);
 	}
 	printf("data %"PRIu32"\n", len);
-	buffer_copy_bytes(input, len);
+	if (buffer_copy_bytes(input, len) != len)
+		die_short_read(input);
 	fputc('\n', stdout);
 }
 
diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c
index 7bc2d3d..dbb9c16 100644
--- a/vcs-svn/svndump.c
+++ b/vcs-svn/svndump.c
@@ -133,6 +133,13 @@ static void handle_property(const char *key, const char *val, uint32_t len,
 	}
 }
 
+static void die_short_read(void)
+{
+	if (buffer_ferror(&input))
+		die_errno("error reading dump file");
+	die("invalid dump: unexpected end of file");
+}
+
 static void read_props(void)
 {
 	char key[16] = {0};
@@ -159,7 +166,9 @@ static void read_props(void)
 			die("invalid property line: %s\n", t);
 		len = atoi(&t[2]);
 		val = buffer_read_string(&input, len);
-		buffer_skip_bytes(&input, 1);	/* Discard trailing newline. */
+		/* Discard trailing newline. */
+		if (buffer_skip_bytes(&input, 1) != 1)
+			die_short_read();
 
 		switch (type) {
 		case 'K':
@@ -402,7 +411,11 @@ void svndump_read(const char *url)
 			if (memcmp(t, "Content-length", 14))
 				continue;
 			len = atoi(val);
-			buffer_read_line(&input);
+			t = buffer_read_line(&input);
+			if (!t)
+				die_short_read();
+			if (*t)
+				die("invalid dump: expected blank line after content length header");
 			if (active_ctx == REV_CTX) {
 				read_props();
 			} else if (active_ctx == NODE_CTX) {
@@ -410,10 +423,13 @@ void svndump_read(const char *url)
 				active_ctx = INTERNODE_CTX;
 			} else {
 				fprintf(stderr, "Unexpected content length header: %"PRIu32"\n", len);
-				buffer_skip_bytes(&input, len);
+				if (buffer_skip_bytes(&input, len) != len)
+					die_short_read();
 			}
 		}
 	}
+	if (buffer_ferror(&input))
+		die_short_read();
 	if (active_ctx == NODE_CTX)
 		handle_node();
 	if (active_ctx == REV_CTX)
-- 
1.7.3.2.846.gf4b062

^ permalink raw reply related	[flat|nested] 72+ messages in thread

* [PATCH 05/16] vcs-svn: learn to maintain a sliding view of a file
  2011-03-19  7:20 ` vcs-svn: integrate support for text deltas David Barr
                     ` (3 preceding siblings ...)
  2011-03-19  7:20   ` [PATCH 04/16] vcs-svn: improve reporting of input errors David Barr
@ 2011-03-19  7:20   ` David Barr
  2011-03-19  7:20   ` [PATCH 06/16] vcs-svn: skeleton of an svn delta parser David Barr
                     ` (11 subsequent siblings)
  16 siblings, 0 replies; 72+ messages in thread
From: David Barr @ 2011-03-19  7:20 UTC (permalink / raw)
  To: Git Mailing List
  Cc: Jonathan Nieder, Ramkumar Ramachandra, Sverre Rabbelier,
	Sam Vilain, Stephen Bash, Tomas Carnecky, David Barr

From: Jonathan Nieder <jrnieder@gmail.com>

Each section of a Subversion-format delta only requires examining (and
keeping in random-access memory) a small portion of the preimage.  At
any moment, this portion is starts at a certain file offset and has a
well-defined length, and as the delta is applied, the portion moves
from the beginning to the end of the preimage.  Add a move_window
function to keep track of this view into the preimage.

You can use it like this:

	buffer_init(f, NULL);
	struct sliding_view window = SLIDING_VIEW_INIT(f);
	move_window(&window, 3, 7);	/* (1) */
	move_window(&window, 5, 5);	/* (2) */
	move_window(&window, 12, 2);	/* (3) */
	strbuf_release(&window.buf);
	buffer_deinit(f);

The data structure is called sliding_view instead of _window to
prevent confusion with svndiff0 Windows.

In this example, (1) reads 10 bytes and discards the first 3;
(2) discards the first 2, which are not needed any more; and (3) skips
2 bytes and reads 2 new bytes to work with.

When move_window returns, the file position indicator is at position
window->off + window->width and the data from positions window->off to
the current file position are stored in window->buf.

This function performs only sequential access from the input file and
never seeks, so it can be safely used on pipes and sockets.

On end-of-file, move_window silently reads less than the caller
requested.  On other errors, it prints a message and returns -1.

Helped-by: David Barr <david.barr@cordelta.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: David Barr <david.barr@cordelta.com>
---
 Makefile                 |    5 ++-
 vcs-svn/LICENSE          |    2 +
 vcs-svn/sliding_window.c |   74 ++++++++++++++++++++++++++++++++++++++++++++++
 vcs-svn/sliding_window.h |   17 ++++++++++
 4 files changed, 96 insertions(+), 2 deletions(-)
 create mode 100644 vcs-svn/sliding_window.c
 create mode 100644 vcs-svn/sliding_window.h

diff --git a/Makefile b/Makefile
index 6165609..bac415d 100644
--- a/Makefile
+++ b/Makefile
@@ -1836,7 +1836,8 @@ endif
 XDIFF_OBJS = xdiff/xdiffi.o xdiff/xprepare.o xdiff/xutils.o xdiff/xemit.o \
 	xdiff/xmerge.o xdiff/xpatience.o
 VCSSVN_OBJS = vcs-svn/line_buffer.o vcs-svn/repo_tree.o \
-	vcs-svn/fast_export.o vcs-svn/svndump.o
+	vcs-svn/fast_export.o vcs-svn/svndump.o \
+	vcs-svn/sliding_window.o
 VCSSVN_TEST_OBJS = test-obj-pool.o test-line-buffer.o test-treap.o
 OBJECTS := $(GIT_OBJS) $(XDIFF_OBJS) $(VCSSVN_OBJS)
 
@@ -1962,7 +1963,7 @@ xdiff-interface.o $(XDIFF_OBJS): \
 
 $(VCSSVN_OBJS) $(VCSSVN_TEST_OBJS): $(LIB_H) \
 	vcs-svn/line_buffer.h vcs-svn/repo_tree.h vcs-svn/fast_export.h \
-	vcs-svn/svndump.h
+	vcs-svn/svndump.h vcs-svn/sliding_window.h
 
 test-svn-fe.o: vcs-svn/svndump.h
 endif
diff --git a/vcs-svn/LICENSE b/vcs-svn/LICENSE
index 533f585..eb91858 100644
--- a/vcs-svn/LICENSE
+++ b/vcs-svn/LICENSE
@@ -1,6 +1,8 @@
 Copyright (C) 2010 David Barr <david.barr@cordelta.com>.
 All rights reserved.
 
+Copyright (C) 2010 Jonathan Nieder <jrnieder@gmail.com>.
+
 Copyright (C) 2005 Stefan Hegny, hydrografix Consulting GmbH,
 Frankfurt/Main, Germany
 and others, see http://svn2cc.sarovar.org
diff --git a/vcs-svn/sliding_window.c b/vcs-svn/sliding_window.c
new file mode 100644
index 0000000..9ce399c
--- /dev/null
+++ b/vcs-svn/sliding_window.c
@@ -0,0 +1,74 @@
+/*
+ * Licensed under a two-clause BSD-style license.
+ * See LICENSE for details.
+ */
+
+#include "git-compat-util.h"
+#include "sliding_window.h"
+#include "line_buffer.h"
+#include "strbuf.h"
+
+static int input_error(struct line_buffer *file)
+{
+	if (!buffer_ferror(file))
+		return error("delta preimage ends early");
+	return error("cannot read delta preimage: %s", strerror(errno));
+}
+
+static int skip_or_whine(struct line_buffer *file, off_t gap)
+{
+	const off_t nread = buffer_skip_bytes(file, gap);
+	return nread == gap ? 0 : input_error(file);
+}
+
+static int read_to_fill_or_whine(struct line_buffer *file,
+				struct strbuf *buf, size_t width)
+{
+	buffer_read_binary(file, buf, width - buf->len);
+	return buf->len == width ? 0 : input_error(file);
+}
+
+static int check_overflow(off_t a, size_t b)
+{
+	if (b > maximum_signed_value_of_type(off_t))
+		return error("unrepresentable length in delta: "
+				"%"PRIuMAX" > OFF_MAX", (uintmax_t) b);
+	if (signed_add_overflows(a, (off_t) b))
+		return error("unrepresentable offset in delta: "
+				"%"PRIuMAX" + %"PRIuMAX" > OFF_MAX",
+				(uintmax_t) a, (uintmax_t) b);
+	return 0;
+}
+
+int move_window(struct sliding_view *view, off_t off, size_t width)
+{
+	off_t file_offset;
+	assert(view);
+	assert(view->width <= view->buf.len);
+	assert(!check_overflow(view->off, view->buf.len));
+
+	if (check_overflow(off, width))
+		return -1;
+	if (off < view->off || off + width < view->off + view->width)
+		return error("invalid delta: window slides left");
+
+	file_offset = view->off + view->buf.len;
+	if (off < file_offset) {
+		/* Move the overlapping region into place. */
+		strbuf_remove(&view->buf, 0, off - view->off);
+	} else {
+		/* Seek ahead to skip the gap. */
+		if (skip_or_whine(view->file, off - file_offset))
+			return -1;
+		strbuf_setlen(&view->buf, 0);
+	}
+
+	if (view->buf.len > width)
+		; /* Already read. */
+	else if (read_to_fill_or_whine(view->file, &view->buf, width))
+		return -1;
+
+	view->off = off;
+	view->width = width;
+	return 0;
+}
diff --git a/vcs-svn/sliding_window.h b/vcs-svn/sliding_window.h
new file mode 100644
index 0000000..ed0bfdd
--- /dev/null
+++ b/vcs-svn/sliding_window.h
@@ -0,0 +1,17 @@
+#ifndef SLIDING_WINDOW_H_
+#define SLIDING_WINDOW_H_
+
+#include "strbuf.h"
+
+struct sliding_view {
+	struct line_buffer *file;
+	off_t off;
+	size_t width;
+	struct strbuf buf;
+};
+
+#define SLIDING_VIEW_INIT(input)	{ (input), 0, 0, STRBUF_INIT }
+
+extern int move_window(struct sliding_view *view, off_t off, size_t width);
+
+#endif
-- 
1.7.3.2.846.gf4b062

^ permalink raw reply related	[flat|nested] 72+ messages in thread

* [PATCH 06/16] vcs-svn: skeleton of an svn delta parser
  2011-03-19  7:20 ` vcs-svn: integrate support for text deltas David Barr
                     ` (4 preceding siblings ...)
  2011-03-19  7:20   ` [PATCH 05/16] vcs-svn: learn to maintain a sliding view of a file David Barr
@ 2011-03-19  7:20   ` David Barr
  2011-03-28  3:30     ` Jonathan Nieder
  2011-03-19  7:20   ` [PATCH 07/16] vcs-svn: parse svndiff0 window header David Barr
                     ` (10 subsequent siblings)
  16 siblings, 1 reply; 72+ messages in thread
From: David Barr @ 2011-03-19  7:20 UTC (permalink / raw)
  To: Git Mailing List
  Cc: Jonathan Nieder, Ramkumar Ramachandra, Sverre Rabbelier,
	Sam Vilain, Stephen Bash, Tomas Carnecky, David Barr

From: Jonathan Nieder <jrnieder@gmail.com>

A delta in the subversion delta (svndiff0) format consists of the
magic bytes SVN\0 followed by a sequence of windows of a certain
well specified format (starting with five integers).

Add an svndiff0_apply function and test-svn-fe -d commandline tool to
parse such a delta in the special case of not including any windows.

Later patches will add features to turn this into a fully functional
delta applier for use by svn-fe in parsing the streams produced by
"svnrdump dump" and "svnadmin dump --deltas".

The content of symlinks starts with the word "link " in Subversion's
worldview, so we will need to prepend that text for the sake of
delta application.  Initialization of the input state of the
delta preimage is left to the calling program, which gives callers
a chance to seed the sliding window with text of their choice.

Improved-by: Ramkumar Ramachandra <artagnon@gmail.com>
Improved-by: David Barr <david.barr@cordelta.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: David Barr <david.barr@cordelta.com>
---
 Makefile              |    4 +-
 t/t9011-svn-da.sh     |   37 ++++++++++++++++++++++++++++++++++
 test-svn-fe.c         |   42 ++++++++++++++++++++++++++++++++-------
 vcs-svn/line_buffer.c |    6 ++--
 vcs-svn/line_buffer.h |    2 +-
 vcs-svn/svndiff.c     |   52 +++++++++++++++++++++++++++++++++++++++++++++++++
 vcs-svn/svndiff.h     |   10 +++++++++
 7 files changed, 139 insertions(+), 14 deletions(-)
 create mode 100755 t/t9011-svn-da.sh
 create mode 100644 vcs-svn/svndiff.c
 create mode 100644 vcs-svn/svndiff.h

diff --git a/Makefile b/Makefile
index bac415d..bc6690a 100644
--- a/Makefile
+++ b/Makefile
@@ -1837,7 +1837,7 @@ XDIFF_OBJS = xdiff/xdiffi.o xdiff/xprepare.o xdiff/xutils.o xdiff/xemit.o \
 	xdiff/xmerge.o xdiff/xpatience.o
 VCSSVN_OBJS = vcs-svn/line_buffer.o vcs-svn/repo_tree.o \
 	vcs-svn/fast_export.o vcs-svn/svndump.o \
-	vcs-svn/sliding_window.o
+	vcs-svn/sliding_window.o vcs-svn/svndiff.o
 VCSSVN_TEST_OBJS = test-obj-pool.o test-line-buffer.o test-treap.o
 OBJECTS := $(GIT_OBJS) $(XDIFF_OBJS) $(VCSSVN_OBJS)
 
@@ -1963,7 +1963,7 @@ xdiff-interface.o $(XDIFF_OBJS): \
 
 $(VCSSVN_OBJS) $(VCSSVN_TEST_OBJS): $(LIB_H) \
 	vcs-svn/line_buffer.h vcs-svn/repo_tree.h vcs-svn/fast_export.h \
-	vcs-svn/svndump.h vcs-svn/sliding_window.h
+	vcs-svn/sliding_window.h vcs-svn/svndiff.h vcs-svn/svndump.h
 
 test-svn-fe.o: vcs-svn/svndump.h
 endif
diff --git a/t/t9011-svn-da.sh b/t/t9011-svn-da.sh
new file mode 100755
index 0000000..6d6a406
--- /dev/null
+++ b/t/t9011-svn-da.sh
@@ -0,0 +1,37 @@
+#!/bin/sh
+
+test_description='test parsing of svndiff0 files
+
+Using the "test-svn-fe -d" helper, check that svn-fe correctly
+interprets deltas using various facilities (some from the spec,
+some only learned from practice).
+'
+. ./test-lib.sh
+
+test_expect_success 'setup' '
+	>empty &&
+	printf foo >preimage
+'
+
+test_expect_success 'reject empty delta' '
+	test_must_fail test-svn-fe -d preimage empty 0
+'
+
+test_expect_success 'delta can empty file' '
+	printf "SVNQ" | q_to_nul >clear.delta &&
+	test-svn-fe -d preimage clear.delta 4 >actual &&
+	test_cmp empty actual
+'
+
+test_expect_success 'reject svndiff2' '
+	printf "SVN\002" >bad.filetype &&
+	test_must_fail test-svn-fe -d preimage bad.filetype 4
+'
+
+test_expect_failure 'one-window empty delta' '
+	printf "SVNQ%s" "QQQQQ" | q_to_nul >clear.onewindow &&
+	test-svn-fe -d preimage clear.onewindow 9 >actual &&
+	test_cmp empty actual
+'
+
+test_done
diff --git a/test-svn-fe.c b/test-svn-fe.c
index b42ba78..6558b52 100644
--- a/test-svn-fe.c
+++ b/test-svn-fe.c
@@ -4,15 +4,41 @@
 
 #include "git-compat-util.h"
 #include "vcs-svn/svndump.h"
+#include "vcs-svn/svndiff.h"
+#include "vcs-svn/sliding_window.h"
+#include "vcs-svn/line_buffer.h"
 
 int main(int argc, char *argv[])
 {
-	if (argc != 2)
-		usage("test-svn-fe <file>");
-	if (svndump_init(argv[1]))
-		return 1;
-	svndump_read(NULL);
-	svndump_deinit();
-	svndump_reset();
-	return 0;
+	static const char test_svnfe_usage[] =
+		"test-svn-fe (<dumpfile> | [-d] <preimage> <delta> <len>)";
+	if (argc == 2) {
+		if (svndump_init(argv[1]))
+			return 1;
+		svndump_read(NULL);
+		svndump_deinit();
+		svndump_reset();
+		return 0;
+	}
+	if (argc == 5 && !strcmp(argv[1], "-d")) {
+		struct line_buffer preimage = LINE_BUFFER_INIT;
+		struct line_buffer delta = LINE_BUFFER_INIT;
+		struct sliding_view preimage_view = SLIDING_VIEW_INIT(&preimage);
+		if (buffer_init(&preimage, argv[2]))
+			die_errno("cannot open preimage");
+		if (buffer_init(&delta, argv[3]))
+			die_errno("cannot open delta");
+		if (svndiff0_apply(&delta, (off_t) strtoull(argv[4], NULL, 0),
+				   &preimage_view, stdout))
+			return 1;
+		if (buffer_deinit(&preimage))
+			die_errno("cannot close preimage");
+		if (buffer_deinit(&delta))
+			die_errno("cannot close delta");
+		buffer_reset(&preimage);
+		strbuf_release(&preimage_view.buf);
+		buffer_reset(&delta);
+		return 0;
+	}
+	usage(test_svnfe_usage);
 }
diff --git a/vcs-svn/line_buffer.c b/vcs-svn/line_buffer.c
index 08d7cae..2f59670 100644
--- a/vcs-svn/line_buffer.c
+++ b/vcs-svn/line_buffer.c
@@ -98,10 +98,10 @@ char *buffer_read_string(struct line_buffer *buf, uint32_t len)
 	return ferror(buf->infile) ? NULL : buf->blob_buffer.buf;
 }
 
-void buffer_read_binary(struct line_buffer *buf,
-				struct strbuf *sb, uint32_t size)
+off_t buffer_read_binary(struct line_buffer *buf,
+				struct strbuf *sb, off_t size)
 {
-	strbuf_fread(sb, size, buf->infile);
+	return strbuf_fread(sb, size, buf->infile);
 }
 
 off_t buffer_copy_bytes(struct line_buffer *buf, off_t nbytes)
diff --git a/vcs-svn/line_buffer.h b/vcs-svn/line_buffer.h
index f5c468a..a253070 100644
--- a/vcs-svn/line_buffer.h
+++ b/vcs-svn/line_buffer.h
@@ -25,7 +25,7 @@ int buffer_ferror(struct line_buffer *buf);
 char *buffer_read_line(struct line_buffer *buf);
 char *buffer_read_string(struct line_buffer *buf, uint32_t len);
 int buffer_read_char(struct line_buffer *buf);
-void buffer_read_binary(struct line_buffer *buf, struct strbuf *sb, uint32_t len);
+off_t buffer_read_binary(struct line_buffer *buf, struct strbuf *sb, off_t len);
 /* Returns number of bytes read (not necessarily written). */
 off_t buffer_copy_bytes(struct line_buffer *buf, off_t len);
 off_t buffer_skip_bytes(struct line_buffer *buf, off_t len);
diff --git a/vcs-svn/svndiff.c b/vcs-svn/svndiff.c
new file mode 100644
index 0000000..5916036
--- /dev/null
+++ b/vcs-svn/svndiff.c
@@ -0,0 +1,52 @@
+/*
+ * Licensed under a two-clause BSD-style license.
+ * See LICENSE for details.
+ */
+
+#include "git-compat-util.h"
+#include "line_buffer.h"
+#include "svndiff.h"
+
+/*
+ * svndiff0 applier
+ *
+ * See http://svn.apache.org/repos/asf/subversion/trunk/notes/svndiff.
+ *
+ * svndiff0 ::= 'SVN\0' window*
+ */
+
+static int error_short_read(struct line_buffer *input)
+{
+	if (buffer_ferror(input))
+		return error("error reading delta: %s", strerror(errno));
+	return error("invalid delta: unexpected end of file");
+}
+
+static int read_magic(struct line_buffer *in, off_t *len)
+{
+	static const char magic[] = {'S', 'V', 'N', '\0'};
+	struct strbuf sb = STRBUF_INIT;
+
+	if (*len < sizeof(magic) ||
+	    buffer_read_binary(in, &sb, sizeof(magic)) != sizeof(magic))
+		return error_short_read(in);
+
+	if (memcmp(sb.buf, magic, sizeof(magic)))
+		return error("invalid delta: unrecognized file type");
+
+	*len -= sizeof(magic);
+	strbuf_release(&sb);
+	return 0;
+}
+
+int svndiff0_apply(struct line_buffer *delta, off_t delta_len,
+			struct sliding_view *preimage, FILE *postimage)
+{
+	assert(delta && preimage && postimage);
+
+	if (read_magic(delta, &delta_len))
+		return -1;
+	if (delta_len)
+		return error("What do you think I am?  A delta applier?");
+	return 0;
+}
diff --git a/vcs-svn/svndiff.h b/vcs-svn/svndiff.h
new file mode 100644
index 0000000..74eb464
--- /dev/null
+++ b/vcs-svn/svndiff.h
@@ -0,0 +1,10 @@
+#ifndef SVNDIFF_H_
+#define SVNDIFF_H_
+
+struct line_buffer;
+struct sliding_view;
+
+extern int svndiff0_apply(struct line_buffer *delta, off_t delta_len,
+		struct sliding_view *preimage, FILE *postimage);
+
+#endif
-- 
1.7.3.2.846.gf4b062

^ permalink raw reply related	[flat|nested] 72+ messages in thread

* [PATCH 07/16] vcs-svn: parse svndiff0 window header
  2011-03-19  7:20 ` vcs-svn: integrate support for text deltas David Barr
                     ` (5 preceding siblings ...)
  2011-03-19  7:20   ` [PATCH 06/16] vcs-svn: skeleton of an svn delta parser David Barr
@ 2011-03-19  7:20   ` David Barr
  2011-03-19  7:20   ` [PATCH 08/16] vcs-svn: read the preimage when applying deltas David Barr
                     ` (9 subsequent siblings)
  16 siblings, 0 replies; 72+ messages in thread
From: David Barr @ 2011-03-19  7:20 UTC (permalink / raw)
  To: Git Mailing List
  Cc: Jonathan Nieder, Ramkumar Ramachandra, Sverre Rabbelier,
	Sam Vilain, Stephen Bash, Tomas Carnecky, David Barr

From: Jonathan Nieder <jrnieder@gmail.com>

Each window in a subversion delta (svndiff0-format file) starts with a
window header, consisting of five integers with variable-length
representation:

	source view offset
	source view length
	output length
	instructions length
	auxiliary data length

Parse it.  The result is not usable for deltas with nonempty postimage
yet; in fact, this only adds support for deltas without any
instructions or auxiliary data.  This is a good place to stop, though,
since that little support lets us add some simple passing tests
concerning error handling to the test suite.

Improved-by: Ramkumar Ramachandra <artagnon@gmail.com>
Improved-by: David Barr <david.barr@cordelta.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: David Barr <david.barr@cordelta.com>
---
 t/t9011-svn-da.sh |   56 +++++++++++++++++++++++++++++++-
 vcs-svn/svndiff.c |   92 ++++++++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 142 insertions(+), 6 deletions(-)

diff --git a/t/t9011-svn-da.sh b/t/t9011-svn-da.sh
index 6d6a406..2ab580a 100755
--- a/t/t9011-svn-da.sh
+++ b/t/t9011-svn-da.sh
@@ -28,10 +28,64 @@ test_expect_success 'reject svndiff2' '
 	test_must_fail test-svn-fe -d preimage bad.filetype 4
 '
 
-test_expect_failure 'one-window empty delta' '
+test_expect_success 'one-window empty delta' '
 	printf "SVNQ%s" "QQQQQ" | q_to_nul >clear.onewindow &&
 	test-svn-fe -d preimage clear.onewindow 9 >actual &&
 	test_cmp empty actual
 '
 
+test_expect_success 'reject incomplete window header' '
+	printf "SVNQ%s" "QQQQQ" | q_to_nul >clear.onewindow &&
+	printf "SVNQ%s" "QQ" | q_to_nul >clear.partialwindow &&
+	test_must_fail test-svn-fe -d preimage clear.onewindow 6 &&
+	test_must_fail test-svn-fe -d preimage clear.partialwindow 6
+'
+
+test_expect_success 'reject declared delta longer than actual delta' '
+	printf "SVNQ%s" "QQQQQ" | q_to_nul >clear.onewindow &&
+	printf "SVNQ%s" "QQ" | q_to_nul >clear.partialwindow &&
+	test_must_fail test-svn-fe -d preimage clear.onewindow 14 &&
+	test_must_fail test-svn-fe -d preimage clear.partialwindow 9
+'
+
+test_expect_success 'two-window empty delta' '
+	printf "SVNQ%s%s" "QQQQQ" "QQQQQ" | q_to_nul >clear.twowindow &&
+	test-svn-fe -d preimage clear.twowindow 14 >actual &&
+	test_must_fail test-svn-fe -d preimage clear.twowindow 13 &&
+	test_cmp empty actual
+'
+
+test_expect_success 'noisy zeroes' '
+	printf "SVNQ%s" \
+		"RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRQQQQQ" |
+		tr R "\200" |
+		q_to_nul >clear.noisy &&
+	len=$(wc -c <clear.noisy) &&
+	test-svn-fe -d preimage clear.noisy $len &&
+	test_cmp empty actual
+'
+
+test_expect_success 'reject variable-length int in magic' '
+	printf "SVNRQ" | tr R "\200" | q_to_nul >clear.badmagic &&
+	test_must_fail test-svn-fe -d preimage clear.badmagic 5
+'
+
+test_expect_success 'reject truncated integer' '
+	printf "SVNQ%s%s" "QQQQQ" "QQQQRRQ" |
+		tr R "\200" |
+		q_to_nul >clear.fullint &&
+	printf "SVNQ%s%s" "QQQQQ" "QQQQRR" |
+		tr RT "\201" |
+		q_to_nul >clear.partialint &&
+	test_must_fail test-svn-fe -d preimage clear.fullint 15 &&
+	test-svn-fe -d preimage clear.fullint 16 &&
+	test_must_fail test-svn-fe -d preimage clear.partialint 15
+'
+
+test_expect_success 'nonempty (but unused) preimage view' '
+	printf "SVNQ%b" "Q\003QQQ" | q_to_nul >clear.readpreimage &&
+	test-svn-fe -d preimage clear.readpreimage 9 >actual &&
+	test_cmp empty actual
+'
+
 test_done
diff --git a/vcs-svn/svndiff.c b/vcs-svn/svndiff.c
index 5916036..249efb6 100644
--- a/vcs-svn/svndiff.c
+++ b/vcs-svn/svndiff.c
@@ -13,8 +13,16 @@
  * See http://svn.apache.org/repos/asf/subversion/trunk/notes/svndiff.
  *
  * svndiff0 ::= 'SVN\0' window*
+ * window ::= int int int int int instructions inline_data;
+ * int ::= highdigit* lowdigit;
+ * highdigit ::= # binary 1000 0000 OR-ed with 7 bit value;
+ * lowdigit ::= # 7 bit value;
  */
 
+#define VLI_CONTINUE	0x80
+#define VLI_DIGIT_MASK	0x7f
+#define VLI_BITS_PER_DIGIT 7
+
 static int error_short_read(struct line_buffer *input)
 {
 	if (buffer_ferror(input))
@@ -28,17 +36,84 @@ static int read_magic(struct line_buffer *in, off_t *len)
 	struct strbuf sb = STRBUF_INIT;
 
 	if (*len < sizeof(magic) ||
-	    buffer_read_binary(in, &sb, sizeof(magic)) != sizeof(magic))
-		return error_short_read(in);
+	    buffer_read_binary(in, &sb, sizeof(magic)) != sizeof(magic)) {
+		error_short_read(in);
+		strbuf_release(&sb);
+		return -1;
+	}
 
-	if (memcmp(sb.buf, magic, sizeof(magic)))
+	if (memcmp(sb.buf, magic, sizeof(magic))) {
+		strbuf_release(&sb);
 		return error("invalid delta: unrecognized file type");
+	}
 
 	*len -= sizeof(magic);
 	strbuf_release(&sb);
 	return 0;
 }
 
+static int read_int(struct line_buffer *in, uintmax_t *result, off_t *len)
+{
+	uintmax_t rv = 0;
+	off_t sz;
+	for (sz = *len; sz; sz--) {
+		const int ch = buffer_read_char(in);
+		if (ch == EOF)
+			break;
+
+		rv <<= VLI_BITS_PER_DIGIT;
+		rv += (ch & VLI_DIGIT_MASK);
+		if (ch & VLI_CONTINUE)
+			continue;
+
+		*result = rv;
+		*len = sz - 1;
+		return 0;
+	}
+	return error_short_read(in);
+}
+
+static int read_offset(struct line_buffer *in, off_t *result, off_t *len)
+{
+	uintmax_t val;
+	if (read_int(in, &val, len))
+		return -1;
+	if (val > maximum_signed_value_of_type(off_t))
+		return error("unrepresentable offset in delta: %"PRIuMAX"", val);
+	*result = val;
+	return 0;
+}
+
+static int read_length(struct line_buffer *in, size_t *result, off_t *len)
+{
+	uintmax_t val;
+	if (read_int(in, &val, len))
+		return -1;
+	if (val > SIZE_MAX)
+		return error("unrepresentable length in delta: %"PRIuMAX"", val);
+	*result = val;
+	return 0;
+}
+
+static int apply_one_window(struct line_buffer *delta, off_t *delta_len)
+{
+	size_t out_len;
+	size_t instructions_len;
+	size_t data_len;
+	assert(delta_len);
+
+	/* "source view" offset and length already handled; */
+	if (read_length(delta, &out_len, delta_len) ||
+	    read_length(delta, &instructions_len, delta_len) ||
+	    read_length(delta, &data_len, delta_len))
+		return -1;
+	if (instructions_len)
+		return error("What do you think I am?  A delta applier?");
+	if (data_len)
+		return error("No support for inline data yet");
+	return 0;
+}
+
 int svndiff0_apply(struct line_buffer *delta, off_t delta_len,
 			struct sliding_view *preimage, FILE *postimage)
 {
@@ -46,7 +121,14 @@ int svndiff0_apply(struct line_buffer *delta, off_t delta_len,
 
 	if (read_magic(delta, &delta_len))
 		return -1;
-	if (delta_len)
-		return error("What do you think I am?  A delta applier?");
+	while (delta_len) {	/* For each window: */
+		off_t pre_off;
+		size_t pre_len;
+
+		if (read_offset(delta, &pre_off, &delta_len) ||
+		    read_length(delta, &pre_len, &delta_len) ||
+		    apply_one_window(delta, &delta_len))
+			return -1;
+	}
 	return 0;
 }
-- 
1.7.3.2.846.gf4b062

^ permalink raw reply related	[flat|nested] 72+ messages in thread

* [PATCH 08/16] vcs-svn: read the preimage when applying deltas
  2011-03-19  7:20 ` vcs-svn: integrate support for text deltas David Barr
                     ` (6 preceding siblings ...)
  2011-03-19  7:20   ` [PATCH 07/16] vcs-svn: parse svndiff0 window header David Barr
@ 2011-03-19  7:20   ` David Barr
  2011-03-19  7:20   ` [PATCH 09/16] vcs-svn: read inline data from deltas David Barr
                     ` (8 subsequent siblings)
  16 siblings, 0 replies; 72+ messages in thread
From: David Barr @ 2011-03-19  7:20 UTC (permalink / raw)
  To: Git Mailing List
  Cc: Jonathan Nieder, Ramkumar Ramachandra, Sverre Rabbelier,
	Sam Vilain, Stephen Bash, Tomas Carnecky, David Barr

From: Jonathan Nieder <jrnieder@gmail.com>

The source view offset heading each svndiff0 window represents a
number of bytes past the beginning of the preimage.  Together with the
source view length, it dictates to the delta applier what portion of
the preimage instructions will refer to.  Read that portion right away
using the sliding window code.

Maybe some day we will use mmap to read data more lazily.

Subversion's implementation tolerates source view offsets pointing
past the end of the preimage file but we do not, for simplicity.

This does not teach the delta applier to read instructions or copy
data from the source view.  Deltas that could produce nonempty output
will still be rejected.

Improved-by: Ramkumar Ramachandra <artagnon@gmail.com>
Improved-by: David Barr <david.barr@cordelta.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Acked-by: Ramkumar Ramachandra <artagnon@gmail.com>
Signed-off-by: David Barr <david.barr@cordelta.com>
---
 t/t9011-svn-da.sh |   35 +++++++++++++++++++++++++++++++++++
 vcs-svn/svndiff.c |    6 ++++--
 2 files changed, 39 insertions(+), 2 deletions(-)

diff --git a/t/t9011-svn-da.sh b/t/t9011-svn-da.sh
index 2ab580a..986dce6 100755
--- a/t/t9011-svn-da.sh
+++ b/t/t9011-svn-da.sh
@@ -88,4 +88,39 @@ test_expect_success 'nonempty (but unused) preimage view' '
 	test_cmp empty actual
 '
 
+test_expect_success 'preimage view: right endpoint cannot backtrack' '
+	printf "SVNQ%b%b" "Q\003QQQ" "Q\002QQQ" |
+		q_to_nul >clear.backtrack &&
+	test_must_fail test-svn-fe -d preimage clear.backtrack 14
+'
+
+test_expect_success 'preimage view: left endpoint can advance' '
+	printf "SVNQ%b%b" "Q\003QQQ" "\001\002QQQ" |
+		q_to_nul >clear.preshrink &&
+	printf "SVNQ%b%b" "Q\003QQQ" "\001\001QQQ" |
+		q_to_nul >clear.shrinkbacktrack &&
+	test-svn-fe -d preimage clear.preshrink 14 >actual &&
+	test_must_fail test-svn-fe -d preimage clear.shrinkbacktrack 14 &&
+	test_cmp empty actual
+'
+
+test_expect_success 'preimage view: offsets compared by value' '
+	printf "SVNQ%b%b" "\001\001QQQ" "\0200Q\003QQQ" |
+		q_to_nul >clear.noisybacktrack &&
+	printf "SVNQ%b%b" "\001\001QQQ" "\0200\001\002QQQ" |
+		q_to_nul >clear.noisyadvance &&
+	test_must_fail test-svn-fe -d preimage clear.noisybacktrack 15
+	test-svn-fe -d preimage clear.noisyadvance 15 &&
+	test_cmp empty actual
+'
+
+test_expect_success 'preimage view: reject truncated preimage' '
+	printf "SVNQ%b" "\010QQQQ" | q_to_nul >clear.lateemptyread &&
+	printf "SVNQ%b" "\010\001QQQ" | q_to_nul >clear.latenonemptyread &&
+	printf "SVNQ%b" "\001\010QQQ" | q_to_nul >clear.longread &&
+	test_must_fail test-svn-fe -d preimage clear.lateemptyread 9 &&
+	test_must_fail test-svn-fe -d preimage clear.latenonemptyread 9 &&
+	test_must_fail test-svn-fe -d preimage clear.longread 9
+'
+
 test_done
diff --git a/vcs-svn/svndiff.c b/vcs-svn/svndiff.c
index 249efb6..1020275 100644
--- a/vcs-svn/svndiff.c
+++ b/vcs-svn/svndiff.c
@@ -4,6 +4,7 @@
  */
 
 #include "git-compat-util.h"
+#include "sliding_window.h"
 #include "line_buffer.h"
 #include "svndiff.h"
 
@@ -122,11 +123,12 @@ int svndiff0_apply(struct line_buffer *delta, off_t delta_len,
 	if (read_magic(delta, &delta_len))
 		return -1;
 	while (delta_len) {	/* For each window: */
-		off_t pre_off;
-		size_t pre_len;
+		off_t pre_off = pre_off;
+		size_t pre_len = pre_len;
 
 		if (read_offset(delta, &pre_off, &delta_len) ||
 		    read_length(delta, &pre_len, &delta_len) ||
+		    move_window(preimage, pre_off, pre_len) ||
 		    apply_one_window(delta, &delta_len))
 			return -1;
 	}
-- 
1.7.3.2.846.gf4b062

^ permalink raw reply related	[flat|nested] 72+ messages in thread

* [PATCH 09/16] vcs-svn: read inline data from deltas
  2011-03-19  7:20 ` vcs-svn: integrate support for text deltas David Barr
                     ` (7 preceding siblings ...)
  2011-03-19  7:20   ` [PATCH 08/16] vcs-svn: read the preimage when applying deltas David Barr
@ 2011-03-19  7:20   ` David Barr
  2011-03-19  7:20   ` [PATCH 10/16] vcs-svn: read instructions " David Barr
                     ` (7 subsequent siblings)
  16 siblings, 0 replies; 72+ messages in thread
From: David Barr @ 2011-03-19  7:20 UTC (permalink / raw)
  To: Git Mailing List
  Cc: Jonathan Nieder, Ramkumar Ramachandra, Sverre Rabbelier,
	Sam Vilain, Stephen Bash, Tomas Carnecky, David Barr

From: Jonathan Nieder <jrnieder@gmail.com>

Each window of an svndiff0-format delta includes a section for new
data that will be copied into the preimage (in the order it appears in
the window, possibly interspersed with other data).

Read this data when encountering it.  It is not actually necessary to
do so --- it would be just as easy to copy straight from the delta
to output when interpreting the relevant instructions --- but this
way, the code that interprets svndiff0 instructions can proceed more
quickly because it does not require any I/O.

Subversion's implementation rejects deltas that do not consume all
the auxiliary data that is available.  Do not check that for now,
because it would make it impossible to test the function of this
patch until the instructions to consume data are implemented.

Do check for truncated data sections.  Since Subversion's applier
rejects deltas that end before the new-data section is declared to
end, it should be safe for this applier to reject such deltas, too.

Improved-by: Ramkumar Ramachandra <artagnon@gmail.com>
Improved-by: David Barr <david.barr@cordelta.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Acked-by: Ramkumar Ramachandra <artagnon@gmail.com>
Signed-off-by: David Barr <david.barr@cordelta.com>
---
 t/t9011-svn-da.sh |   12 ++++++++++++
 vcs-svn/svndiff.c |   46 +++++++++++++++++++++++++++++++++++-----------
 2 files changed, 47 insertions(+), 11 deletions(-)

diff --git a/t/t9011-svn-da.sh b/t/t9011-svn-da.sh
index 986dce6..4919fae 100755
--- a/t/t9011-svn-da.sh
+++ b/t/t9011-svn-da.sh
@@ -123,4 +123,16 @@ test_expect_success 'preimage view: reject truncated preimage' '
 	test_must_fail test-svn-fe -d preimage clear.longread 9
 '
 
+test_expect_success 'inline data' '
+	printf "SVNQ%b%s%b%s" "QQQQ\003" "bar" "QQQQ\001" "x" |
+		q_to_nul >inline.clear &&
+	test-svn-fe -d preimage inline.clear 18 >actual &&
+	test_cmp empty actual
+'
+
+test_expect_success 'reject truncated inline data' '
+	printf "SVNQ%b%s" "QQQQ\003" "b" | q_to_nul >inline.trunc &&
+	test_must_fail test-svn-fe -d preimage inline.trunc 10
+'
+
 test_done
diff --git a/vcs-svn/svndiff.c b/vcs-svn/svndiff.c
index 1020275..74e0401 100644
--- a/vcs-svn/svndiff.c
+++ b/vcs-svn/svndiff.c
@@ -24,6 +24,17 @@
 #define VLI_DIGIT_MASK	0x7f
 #define VLI_BITS_PER_DIGIT 7
 
+struct window {
+	struct strbuf data;
+};
+
+#define WINDOW_INIT	{ STRBUF_INIT }
+
+static void window_release(struct window *ctx)
+{
+	strbuf_release(&ctx->data);
+}
+
 static int error_short_read(struct line_buffer *input)
 {
 	if (buffer_ferror(input))
@@ -31,24 +42,30 @@ static int error_short_read(struct line_buffer *input)
 	return error("invalid delta: unexpected end of file");
 }
 
+static int read_chunk(struct line_buffer *delta, off_t *delta_len,
+		      struct strbuf *buf, size_t len)
+{
+	strbuf_reset(buf);
+	if (len > *delta_len ||
+	    buffer_read_binary(delta, buf, len) != len)
+		return error_short_read(delta);
+	*delta_len -= buf->len;
+	return 0;
+}
+
 static int read_magic(struct line_buffer *in, off_t *len)
 {
 	static const char magic[] = {'S', 'V', 'N', '\0'};
 	struct strbuf sb = STRBUF_INIT;
 
-	if (*len < sizeof(magic) ||
-	    buffer_read_binary(in, &sb, sizeof(magic)) != sizeof(magic)) {
-		error_short_read(in);
+	if (read_chunk(in, len, &sb, sizeof(magic))) {
 		strbuf_release(&sb);
 		return -1;
 	}
-
 	if (memcmp(sb.buf, magic, sizeof(magic))) {
 		strbuf_release(&sb);
 		return error("invalid delta: unrecognized file type");
 	}
-
-	*len -= sizeof(magic);
 	strbuf_release(&sb);
 	return 0;
 }
@@ -98,6 +115,7 @@ static int read_length(struct line_buffer *in, size_t *result, off_t *len)
 
 static int apply_one_window(struct line_buffer *delta, off_t *delta_len)
 {
+	struct window ctx = WINDOW_INIT;
 	size_t out_len;
 	size_t instructions_len;
 	size_t data_len;
@@ -107,12 +125,18 @@ static int apply_one_window(struct line_buffer *delta, off_t *delta_len)
 	if (read_length(delta, &out_len, delta_len) ||
 	    read_length(delta, &instructions_len, delta_len) ||
 	    read_length(delta, &data_len, delta_len))
-		return -1;
-	if (instructions_len)
-		return error("What do you think I am?  A delta applier?");
-	if (data_len)
-		return error("No support for inline data yet");
+		goto error_out;
+	if (instructions_len) {
+		error("What do you think I am?  A delta applier?");
+		goto error_out;
+	}
+	if (read_chunk(delta, delta_len, &ctx.data, data_len))
+		goto error_out;
+	window_release(&ctx);
 	return 0;
+error_out:
+	window_release(&ctx);
+	return -1;
 }
 
 int svndiff0_apply(struct line_buffer *delta, off_t delta_len,
-- 
1.7.3.2.846.gf4b062

^ permalink raw reply related	[flat|nested] 72+ messages in thread

* [PATCH 10/16] vcs-svn: read instructions from deltas
  2011-03-19  7:20 ` vcs-svn: integrate support for text deltas David Barr
                     ` (8 preceding siblings ...)
  2011-03-19  7:20   ` [PATCH 09/16] vcs-svn: read inline data from deltas David Barr
@ 2011-03-19  7:20   ` David Barr
  2011-03-19  7:20   ` [PATCH 11/16] vcs-svn: implement copyfrom_data delta instruction David Barr
                     ` (6 subsequent siblings)
  16 siblings, 0 replies; 72+ messages in thread
From: David Barr @ 2011-03-19  7:20 UTC (permalink / raw)
  To: Git Mailing List
  Cc: Jonathan Nieder, Ramkumar Ramachandra, Sverre Rabbelier,
	Sam Vilain, Stephen Bash, Tomas Carnecky, David Barr

From: Jonathan Nieder <jrnieder@gmail.com>

Buffer the instruction section upon encountering it for later
interpretation.

An alternative design would involve parsing the instructions
at this point and buffering them in some processed form.  Using
the unprocessed form is simpler.

Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Acked-by: Ramkumar Ramachandra <artagnon@gmail.com>
Signed-off-by: David Barr <david.barr@cordelta.com>
---
 t/t9011-svn-da.sh |    5 +++++
 vcs-svn/svndiff.c |    6 +++++-
 2 files changed, 10 insertions(+), 1 deletions(-)

diff --git a/t/t9011-svn-da.sh b/t/t9011-svn-da.sh
index 4919fae..e909d92 100755
--- a/t/t9011-svn-da.sh
+++ b/t/t9011-svn-da.sh
@@ -135,4 +135,9 @@ test_expect_success 'reject truncated inline data' '
 	test_must_fail test-svn-fe -d preimage inline.trunc 10
 '
 
+test_expect_success 'reject truncated inline data (after instruction section)' '
+	printf "SVNQ%b%b%s" "QQ\001\001\003" "\0201" "b" | q_to_nul >insn.trunc &&
+	test_must_fail test-svn-fe -d preimage insn.trunc 11
+'
+
 test_done
diff --git a/vcs-svn/svndiff.c b/vcs-svn/svndiff.c
index 74e0401..a2dd9b6 100644
--- a/vcs-svn/svndiff.c
+++ b/vcs-svn/svndiff.c
@@ -25,13 +25,15 @@
 #define VLI_BITS_PER_DIGIT 7
 
 struct window {
+	struct strbuf instructions;
 	struct strbuf data;
 };
 
-#define WINDOW_INIT	{ STRBUF_INIT }
+#define WINDOW_INIT	{ STRBUF_INIT, STRBUF_INIT }
 
 static void window_release(struct window *ctx)
 {
+	strbuf_release(&ctx->instructions);
 	strbuf_release(&ctx->data);
 }
 
@@ -126,6 +128,8 @@ static int apply_one_window(struct line_buffer *delta, off_t *delta_len)
 	    read_length(delta, &instructions_len, delta_len) ||
 	    read_length(delta, &data_len, delta_len))
 		goto error_out;
+	if (read_chunk(delta, delta_len, &ctx.instructions, instructions_len))
+		goto error_out;
 	if (instructions_len) {
 		error("What do you think I am?  A delta applier?");
 		goto error_out;
-- 
1.7.3.2.846.gf4b062

^ permalink raw reply related	[flat|nested] 72+ messages in thread

* [PATCH 11/16] vcs-svn: implement copyfrom_data delta instruction
  2011-03-19  7:20 ` vcs-svn: integrate support for text deltas David Barr
                     ` (9 preceding siblings ...)
  2011-03-19  7:20   ` [PATCH 10/16] vcs-svn: read instructions " David Barr
@ 2011-03-19  7:20   ` David Barr
  2011-03-19  7:20   ` [PATCH 12/16] vcs-svn: verify that deltas consume all inline data David Barr
                     ` (5 subsequent siblings)
  16 siblings, 0 replies; 72+ messages in thread
From: David Barr @ 2011-03-19  7:20 UTC (permalink / raw)
  To: Git Mailing List
  Cc: Jonathan Nieder, Ramkumar Ramachandra, Sverre Rabbelier,
	Sam Vilain, Stephen Bash, Tomas Carnecky, David Barr

From: Jonathan Nieder <jrnieder@gmail.com>

The copyfrom_data instruction copies a few bytes verbatim from the
auxiliary data section of a window to the postimage.

[jn: with memory leak fix from David]

Improved-by: David Barr <david.barr@cordelta.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Acked-by: Ramkumar Ramachandra <artagnon@gmail.com>
Signed-off-by: David Barr <david.barr@cordelta.com>
---
 t/t9011-svn-da.sh |   31 ++++++++++++++
 vcs-svn/svndiff.c |  114 ++++++++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 139 insertions(+), 6 deletions(-)

diff --git a/t/t9011-svn-da.sh b/t/t9011-svn-da.sh
index e909d92..c6dbb21 100755
--- a/t/t9011-svn-da.sh
+++ b/t/t9011-svn-da.sh
@@ -140,4 +140,35 @@ test_expect_success 'reject truncated inline data (after instruction section)' '
 	test_must_fail test-svn-fe -d preimage insn.trunc 11
 '
 
+test_expect_success 'copyfrom_data' '
+	echo hi >expect &&
+	printf "SVNQ%b%b%b" "QQ\003\001\003" "\0203" "hi\n" | q_to_nul >copydat &&
+	test-svn-fe -d preimage copydat 13 >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'multiple copyfrom_data' '
+	echo hi >expect &&
+	printf "SVNQ%b%b%b%b%b" "QQ\003\002\003" "\0201\0202" "hi\n" \
+		"QQQ\002Q" "\0200Q" | q_to_nul >copy.multi &&
+	len=$(wc -c <copy.multi) &&
+	test-svn-fe -d preimage copy.multi $len >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'incomplete multiple insn' '
+	printf "SVNQ%b%b%b" "QQ\003\002\003" "\0203\0200" "hi\n" |
+		q_to_nul >copy.partial &&
+	len=$(wc -c <copy.partial) &&
+	test_must_fail test-svn-fe -d preimage copy.partial $len
+'
+
+test_expect_success 'catch attempt to copy missing data' '
+	printf "SVNQ%b%b%s%b%s" "QQ\002\002\001" "\0201\0201" "X" \
+			"QQQQ\002" "YZ" |
+		q_to_nul >copy.incomplete &&
+	len=$(wc -c <copy.incomplete) &&
+	test_must_fail test-svn-fe -d preimage copy.incomplete $len
+'
+
 test_done
diff --git a/vcs-svn/svndiff.c b/vcs-svn/svndiff.c
index a2dd9b6..118e8c6 100644
--- a/vcs-svn/svndiff.c
+++ b/vcs-svn/svndiff.c
@@ -15,28 +15,49 @@
  *
  * svndiff0 ::= 'SVN\0' window*
  * window ::= int int int int int instructions inline_data;
+ * instructions ::= instruction*;
+ * instruction ::= view_selector int int
+ *   | copyfrom_data int
+ *   | packed_view_selector int
+ *   | packed_copyfrom_data
+ *   ;
+ * copyfrom_data ::= # binary 10 000000;
+ * packed_copyfrom_data ::= # copyfrom_data OR-ed with 6 bit value;
  * int ::= highdigit* lowdigit;
  * highdigit ::= # binary 1000 0000 OR-ed with 7 bit value;
  * lowdigit ::= # 7 bit value;
  */
 
+#define INSN_MASK	0xc0
+#define INSN_COPYFROM_DATA	0x80
+#define OPERAND_MASK	0x3f
+
 #define VLI_CONTINUE	0x80
 #define VLI_DIGIT_MASK	0x7f
 #define VLI_BITS_PER_DIGIT 7
 
 struct window {
+	struct strbuf out;
 	struct strbuf instructions;
 	struct strbuf data;
 };
 
-#define WINDOW_INIT	{ STRBUF_INIT, STRBUF_INIT }
+#define WINDOW_INIT	{ STRBUF_INIT, STRBUF_INIT, STRBUF_INIT }
 
 static void window_release(struct window *ctx)
 {
+	strbuf_release(&ctx->out);
 	strbuf_release(&ctx->instructions);
 	strbuf_release(&ctx->data);
 }
 
+static int write_strbuf(struct strbuf *sb, FILE *out)
+{
+	if (fwrite(sb->buf, 1, sb->len, out) == sb->len)	/* Success. */
+		return 0;
+	return error("cannot write delta postimage: %s", strerror(errno));
+}
+
 static int error_short_read(struct line_buffer *input)
 {
 	if (buffer_ferror(input))
@@ -93,6 +114,25 @@ static int read_int(struct line_buffer *in, uintmax_t *result, off_t *len)
 	return error_short_read(in);
 }
 
+static int parse_int(const char **buf, size_t *result, const char *end)
+{
+	size_t rv = 0;
+	const char *pos;
+	for (pos = *buf; pos != end; pos++) {
+		unsigned char ch = *pos;
+
+		rv <<= VLI_BITS_PER_DIGIT;
+		rv += (ch & VLI_DIGIT_MASK);
+		if (ch & VLI_CONTINUE)
+			continue;
+
+		*result = rv;
+		*buf = pos + 1;
+		return 0;
+	}
+	return error("invalid delta: unexpected end of instructions section");
+}
+
 static int read_offset(struct line_buffer *in, off_t *result, off_t *len)
 {
 	uintmax_t val;
@@ -115,7 +155,64 @@ static int read_length(struct line_buffer *in, size_t *result, off_t *len)
 	return 0;
 }
 
-static int apply_one_window(struct line_buffer *delta, off_t *delta_len)
+static int copyfrom_data(struct window *ctx, size_t *data_pos, size_t nbytes)
+{
+	const size_t pos = *data_pos;
+	if (unsigned_add_overflows(pos, nbytes) ||
+	    pos + nbytes > ctx->data.len)
+		return error("invalid delta: copies unavailable inline data.");
+	strbuf_add(&ctx->out, ctx->data.buf + pos, nbytes);
+	*data_pos += nbytes;
+	return 0;
+}
+
+static int parse_first_operand(const char **buf, size_t *out, const char *end)
+{
+	size_t result = (unsigned char) *(*buf)++ & OPERAND_MASK;
+	if (result) {	/* immediate operand */
+		*out = result;
+		return 0;
+	}
+	return parse_int(buf, out, end);
+}
+
+static int execute_one_instruction(struct window *ctx,
+				const char **instructions, size_t *data_pos)
+{
+	unsigned int instruction;
+	const char *insns_end = ctx->instructions.buf + ctx->instructions.len;
+	size_t nbytes;
+	assert(ctx);
+	assert(instructions && *instructions);
+	assert(data_pos);
+
+	instruction = (unsigned char) **instructions;
+	if (parse_first_operand(instructions, &nbytes, insns_end))
+		return -1;
+	if ((instruction & INSN_MASK) != INSN_COPYFROM_DATA)
+		return error("Unknown instruction %x", instruction);
+	return copyfrom_data(ctx, data_pos, nbytes);
+}
+
+static int apply_window_in_core(struct window *ctx)
+{
+	const char *instructions;
+	size_t data_pos = 0;
+
+	/*
+	 * Fill ctx->out.buf using data from the source, target,
+	 * and inline data views.
+	 */
+	for (instructions = ctx->instructions.buf;
+	     instructions != ctx->instructions.buf + ctx->instructions.len;
+	     )
+		if (execute_one_instruction(ctx, &instructions, &data_pos))
+			return -1;
+	return 0;
+}
+
+static int apply_one_window(struct line_buffer *delta, off_t *delta_len,
+			    FILE *out)
 {
 	struct window ctx = WINDOW_INIT;
 	size_t out_len;
@@ -130,11 +227,16 @@ static int apply_one_window(struct line_buffer *delta, off_t *delta_len)
 		goto error_out;
 	if (read_chunk(delta, delta_len, &ctx.instructions, instructions_len))
 		goto error_out;
-	if (instructions_len) {
-		error("What do you think I am?  A delta applier?");
+	if (read_chunk(delta, delta_len, &ctx.data, data_len))
+		goto error_out;
+	strbuf_grow(&ctx.out, out_len);
+	if (apply_window_in_core(&ctx))
+		goto error_out;
+	if (ctx.out.len != out_len) {
+		error("invalid delta: incorrect postimage length");
 		goto error_out;
 	}
-	if (read_chunk(delta, delta_len, &ctx.data, data_len))
+	if (write_strbuf(&ctx.out, out))
 		goto error_out;
 	window_release(&ctx);
 	return 0;
@@ -157,7 +259,7 @@ int svndiff0_apply(struct line_buffer *delta, off_t delta_len,
 		if (read_offset(delta, &pre_off, &delta_len) ||
 		    read_length(delta, &pre_len, &delta_len) ||
 		    move_window(preimage, pre_off, pre_len) ||
-		    apply_one_window(delta, &delta_len))
+		    apply_one_window(delta, &delta_len, postimage))
 			return -1;
 	}
 	return 0;
-- 
1.7.3.2.846.gf4b062

^ permalink raw reply related	[flat|nested] 72+ messages in thread

* [PATCH 12/16] vcs-svn: verify that deltas consume all inline data
  2011-03-19  7:20 ` vcs-svn: integrate support for text deltas David Barr
                     ` (10 preceding siblings ...)
  2011-03-19  7:20   ` [PATCH 11/16] vcs-svn: implement copyfrom_data delta instruction David Barr
@ 2011-03-19  7:20   ` David Barr
  2011-03-19  7:20   ` [PATCH 13/16] vcs-svn: let deltas use data from postimage David Barr
                     ` (4 subsequent siblings)
  16 siblings, 0 replies; 72+ messages in thread
From: David Barr @ 2011-03-19  7:20 UTC (permalink / raw)
  To: Git Mailing List
  Cc: Jonathan Nieder, Ramkumar Ramachandra, Sverre Rabbelier,
	Sam Vilain, Stephen Bash, Tomas Carnecky, David Barr

From: Jonathan Nieder <jrnieder@gmail.com>

By constraining the format of deltas, we can more easily detect
corruption and other breakage.

Requiring deltas not to provide unconsumed data also opens the
possibility of ignoring the declared amount of new data and simply
streaming the data as needed to fulfill copyfrom_data requests.

Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Acked-by: Ramkumar Ramachandra <artagnon@gmail.com>
Signed-off-by: David Barr <david.barr@cordelta.com>
---
 t/t9011-svn-da.sh |    5 ++---
 vcs-svn/svndiff.c |    2 ++
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/t/t9011-svn-da.sh b/t/t9011-svn-da.sh
index c6dbb21..7f422ca 100755
--- a/t/t9011-svn-da.sh
+++ b/t/t9011-svn-da.sh
@@ -123,11 +123,10 @@ test_expect_success 'preimage view: reject truncated preimage' '
 	test_must_fail test-svn-fe -d preimage clear.longread 9
 '
 
-test_expect_success 'inline data' '
+test_expect_success 'forbid unconsumed inline data' '
 	printf "SVNQ%b%s%b%s" "QQQQ\003" "bar" "QQQQ\001" "x" |
 		q_to_nul >inline.clear &&
-	test-svn-fe -d preimage inline.clear 18 >actual &&
-	test_cmp empty actual
+	test_must_fail test-svn-fe -d preimage inline.clear 18 >actual
 '
 
 test_expect_success 'reject truncated inline data' '
diff --git a/vcs-svn/svndiff.c b/vcs-svn/svndiff.c
index 118e8c6..6b505dc 100644
--- a/vcs-svn/svndiff.c
+++ b/vcs-svn/svndiff.c
@@ -208,6 +208,8 @@ static int apply_window_in_core(struct window *ctx)
 	     )
 		if (execute_one_instruction(ctx, &instructions, &data_pos))
 			return -1;
+	if (data_pos != ctx->data.len)
+		return error("invalid delta: does not copy all inline data");
 	return 0;
 }
 
-- 
1.7.3.2.846.gf4b062

^ permalink raw reply related	[flat|nested] 72+ messages in thread

* [PATCH 13/16] vcs-svn: let deltas use data from postimage
  2011-03-19  7:20 ` vcs-svn: integrate support for text deltas David Barr
                     ` (11 preceding siblings ...)
  2011-03-19  7:20   ` [PATCH 12/16] vcs-svn: verify that deltas consume all inline data David Barr
@ 2011-03-19  7:20   ` David Barr
  2011-03-19  7:20   ` [PATCH 14/16] vcs-svn: let deltas use data from preimage David Barr
                     ` (3 subsequent siblings)
  16 siblings, 0 replies; 72+ messages in thread
From: David Barr @ 2011-03-19  7:20 UTC (permalink / raw)
  To: Git Mailing List
  Cc: Jonathan Nieder, Ramkumar Ramachandra, Sverre Rabbelier,
	Sam Vilain, Stephen Bash, Tomas Carnecky, David Barr

From: Jonathan Nieder <jrnieder@gmail.com>

The copyfrom_target instruction copies appends data that is already
present in the current output view to the end of output.  (The offset
argument is relative to the beginning of output produced in the
current window.)

The region copied is allowed to run past the end of the existing
output.  To support that case, copy one character at a time rather
than calling memcpy or memmove.  This allows copyfrom_target to be
used once to repeat a string many times.  For example:

	COPYFROM_DATA 2
	COPYFROM_OUTPUT 10, 0
	DATA "ab"

would produce the output "ababababababababababab".

Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Acked-by: Ramkumar Ramachandra <artagnon@gmail.com>
Signed-off-by: David Barr <david.barr@cordelta.com>
---
 t/t9011-svn-da.sh |   42 ++++++++++++++++++++++++++++++++++++++++++
 vcs-svn/svndiff.c |   28 ++++++++++++++++++++++++++--
 2 files changed, 68 insertions(+), 2 deletions(-)

diff --git a/t/t9011-svn-da.sh b/t/t9011-svn-da.sh
index 7f422ca..d13115a 100755
--- a/t/t9011-svn-da.sh
+++ b/t/t9011-svn-da.sh
@@ -170,4 +170,46 @@ test_expect_success 'catch attempt to copy missing data' '
 	test_must_fail test-svn-fe -d preimage copy.incomplete $len
 '
 
+test_expect_success 'copyfrom target to repeat data' '
+	printf foofoo >expect &&
+	printf "SVNQ%b%b%s" "QQ\006\004\003" "\0203\0100\003Q" "foo" |
+		q_to_nul >copytarget.repeat &&
+	len=$(wc -c <copytarget.repeat) &&
+	test-svn-fe -d preimage copytarget.repeat $len >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'copyfrom target out of order' '
+	printf foooof >expect &&
+	printf "SVNQ%b%b%s" \
+		"QQ\006\007\003" "\0203\0101\002\0101\001\0101Q" "foo" |
+		q_to_nul >copytarget.reverse &&
+	len=$(wc -c <copytarget.reverse) &&
+	test-svn-fe -d preimage copytarget.reverse $len >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'catch copyfrom future' '
+	printf "SVNQ%b%b%s" "QQ\004\004\003" "\0202\0101\002\0201" "XYZ" |
+		q_to_nul >copytarget.infuture &&
+	len=$(wc -c <copytarget.infuture) &&
+	test_must_fail test-svn-fe -d preimage copytarget.infuture $len
+'
+
+test_expect_success 'copy to sustain' '
+	printf XYXYXYXYXYXZ >expect &&
+	printf "SVNQ%b%b%s" "QQ\014\004\003" "\0202\0111Q\0201" "XYZ" |
+		q_to_nul >copytarget.sustain &&
+	len=$(wc -c <copytarget.sustain) &&
+	test-svn-fe -d preimage copytarget.sustain $len >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'catch copy that overflows' '
+	printf "SVNQ%b%b%s" "QQ\003\003\001" "\0201\0177Q" X |
+		q_to_nul >copytarget.overflow &&
+	len=$(wc -c <copytarget.overflow) &&
+	test_must_fail test-svn-fe -d preimage copytarget.overflow $len
+'
+
 test_done
diff --git a/vcs-svn/svndiff.c b/vcs-svn/svndiff.c
index 6b505dc..f4c5dae 100644
--- a/vcs-svn/svndiff.c
+++ b/vcs-svn/svndiff.c
@@ -21,7 +21,12 @@
  *   | packed_view_selector int
  *   | packed_copyfrom_data
  *   ;
+ * view_selector ::= copyfrom_source
+ *   | copyfrom_target
+ *   ;
+ * copyfrom_target ::= # binary 01 000000;
  * copyfrom_data ::= # binary 10 000000;
+ * packed_view_selector ::= # view_selector OR-ed with 6 bit value;
  * packed_copyfrom_data ::= # copyfrom_data OR-ed with 6 bit value;
  * int ::= highdigit* lowdigit;
  * highdigit ::= # binary 1000 0000 OR-ed with 7 bit value;
@@ -29,6 +34,7 @@
  */
 
 #define INSN_MASK	0xc0
+#define INSN_COPYFROM_TARGET	0x40
 #define INSN_COPYFROM_DATA	0x80
 #define OPERAND_MASK	0x3f
 
@@ -155,6 +161,19 @@ static int read_length(struct line_buffer *in, size_t *result, off_t *len)
 	return 0;
 }
 
+static int copyfrom_target(struct window *ctx, const char **instructions,
+			   size_t nbytes, const char *instructions_end)
+{
+	size_t offset;
+	if (parse_int(instructions, &offset, instructions_end))
+		return -1;
+	if (offset >= ctx->out.len)
+		return error("invalid delta: copies from the future.");
+	for (; nbytes > 0; nbytes--)
+		strbuf_addch(&ctx->out, ctx->out.buf[offset++]);
+	return 0;
+}
+
 static int copyfrom_data(struct window *ctx, size_t *data_pos, size_t nbytes)
 {
 	const size_t pos = *data_pos;
@@ -189,9 +208,14 @@ static int execute_one_instruction(struct window *ctx,
 	instruction = (unsigned char) **instructions;
 	if (parse_first_operand(instructions, &nbytes, insns_end))
 		return -1;
-	if ((instruction & INSN_MASK) != INSN_COPYFROM_DATA)
+	switch (instruction & INSN_MASK) {
+	case INSN_COPYFROM_TARGET:
+		return copyfrom_target(ctx, instructions, nbytes, insns_end);
+	case INSN_COPYFROM_DATA:
+		return copyfrom_data(ctx, data_pos, nbytes);
+	default:
 		return error("Unknown instruction %x", instruction);
-	return copyfrom_data(ctx, data_pos, nbytes);
+	}
 }
 
 static int apply_window_in_core(struct window *ctx)
-- 
1.7.3.2.846.gf4b062

^ permalink raw reply related	[flat|nested] 72+ messages in thread

* [PATCH 14/16] vcs-svn: let deltas use data from preimage
  2011-03-19  7:20 ` vcs-svn: integrate support for text deltas David Barr
                     ` (12 preceding siblings ...)
  2011-03-19  7:20   ` [PATCH 13/16] vcs-svn: let deltas use data from postimage David Barr
@ 2011-03-19  7:20   ` David Barr
  2011-03-19  7:20   ` [PATCH 15/16] vcs-svn: microcleanup in svndiff0 window-reading code David Barr
                     ` (2 subsequent siblings)
  16 siblings, 0 replies; 72+ messages in thread
From: David Barr @ 2011-03-19  7:20 UTC (permalink / raw)
  To: Git Mailing List
  Cc: Jonathan Nieder, Ramkumar Ramachandra, Sverre Rabbelier,
	Sam Vilain, Stephen Bash, Tomas Carnecky, David Barr

From: Jonathan Nieder <jrnieder@gmail.com>

The copyfrom_source instruction appends data from the preimage buffer
to the end of output.  Its arguments are a length and an offset
relative to the beginning of the source view.

With this change, the delta applier is able to reproduce all 5,636,613
blobs in the early history of the ASF repository.  Tested with

	mkfifo backflow
	svn-fe <svn-asf-public-r0:940166 3<backflow |
	git fast-import --cat-blob-fd=3 3>backflow

with svn-asf-public-r0:940166 produced by whatever version of
Subversion the dumps in /dump/ on svn.apache.org use (presumably
1.6.something).

Improved-by: Ramkumar Ramachandra <artagnon@gmail.com>
Improved-by: David Barr <david.barr@cordelta.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Acked-by: Ramkumar Ramachandra <artagnon@gmail.com>
Signed-off-by: David Barr <david.barr@cordelta.com>
---
 t/t9011-svn-da.sh |   35 +++++++++++++++++++++++++++++++++++
 vcs-svn/svndiff.c |   28 +++++++++++++++++++++++-----
 2 files changed, 58 insertions(+), 5 deletions(-)

diff --git a/t/t9011-svn-da.sh b/t/t9011-svn-da.sh
index d13115a..45e396f 100755
--- a/t/t9011-svn-da.sh
+++ b/t/t9011-svn-da.sh
@@ -212,4 +212,39 @@ test_expect_success 'catch copy that overflows' '
 	test_must_fail test-svn-fe -d preimage copytarget.overflow $len
 '
 
+test_expect_success 'copyfrom source' '
+	printf foo >expect &&
+	printf "SVNQ%b%b" "Q\003\003\002Q" "\003Q" | q_to_nul >copysource.all &&
+	test-svn-fe -d preimage copysource.all 11 >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'copy backwards' '
+	printf oof >expect &&
+	printf "SVNQ%b%b" "Q\003\003\006Q" "\001\002\001\001\001Q" |
+		q_to_nul >copysource.rev &&
+	test-svn-fe -d preimage copysource.rev 15 >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'offsets are relative to window' '
+	printf fo >expect &&
+	printf "SVNQ%b%b%b%b" "Q\003\001\002Q" "\001Q" \
+		"\002\001\001\002Q" "\001Q" |
+		q_to_nul >copysource.two &&
+	test-svn-fe -d preimage copysource.two 18 >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'example from notes/svndiff' '
+	printf aaaaccccdddddddd >expect &&
+	printf aaaabbbbcccc >source &&
+	printf "SVNQ%b%b%s" "Q\014\020\007\001" \
+		"\004Q\004\010\0201\0107\010" d |
+		q_to_nul >delta.example &&
+	len=$(wc -c <delta.example) &&
+	test-svn-fe -d source delta.example $len >actual &&
+	test_cmp expect actual
+'
+
 test_done
diff --git a/vcs-svn/svndiff.c b/vcs-svn/svndiff.c
index f4c5dae..b7d40c8 100644
--- a/vcs-svn/svndiff.c
+++ b/vcs-svn/svndiff.c
@@ -24,6 +24,7 @@
  * view_selector ::= copyfrom_source
  *   | copyfrom_target
  *   ;
+ * copyfrom_source ::= # binary 00 000000;
  * copyfrom_target ::= # binary 01 000000;
  * copyfrom_data ::= # binary 10 000000;
  * packed_view_selector ::= # view_selector OR-ed with 6 bit value;
@@ -34,6 +35,7 @@
  */
 
 #define INSN_MASK	0xc0
+#define INSN_COPYFROM_SOURCE	0x00
 #define INSN_COPYFROM_TARGET	0x40
 #define INSN_COPYFROM_DATA	0x80
 #define OPERAND_MASK	0x3f
@@ -43,12 +45,13 @@
 #define VLI_BITS_PER_DIGIT 7
 
 struct window {
+	struct sliding_view *in;
 	struct strbuf out;
 	struct strbuf instructions;
 	struct strbuf data;
 };
 
-#define WINDOW_INIT	{ STRBUF_INIT, STRBUF_INIT, STRBUF_INIT }
+#define WINDOW_INIT(w)	{ w, STRBUF_INIT, STRBUF_INIT, STRBUF_INIT }
 
 static void window_release(struct window *ctx)
 {
@@ -161,6 +164,19 @@ static int read_length(struct line_buffer *in, size_t *result, off_t *len)
 	return 0;
 }
 
+static int copyfrom_source(struct window *ctx, const char **instructions,
+			   size_t nbytes, const char *insns_end)
+{
+	size_t offset;
+	if (parse_int(instructions, &offset, insns_end))
+		return -1;
+	if (unsigned_add_overflows(offset, nbytes) ||
+	    offset + nbytes > ctx->in->width)
+		return error("invalid delta: copies source data outside view");
+	strbuf_add(&ctx->out, ctx->in->buf.buf + offset, nbytes);
+	return 0;
+}
+
 static int copyfrom_target(struct window *ctx, const char **instructions,
 			   size_t nbytes, const char *instructions_end)
 {
@@ -209,12 +225,14 @@ static int execute_one_instruction(struct window *ctx,
 	if (parse_first_operand(instructions, &nbytes, insns_end))
 		return -1;
 	switch (instruction & INSN_MASK) {
+	case INSN_COPYFROM_SOURCE:
+		return copyfrom_source(ctx, instructions, nbytes, insns_end);
 	case INSN_COPYFROM_TARGET:
 		return copyfrom_target(ctx, instructions, nbytes, insns_end);
 	case INSN_COPYFROM_DATA:
 		return copyfrom_data(ctx, data_pos, nbytes);
 	default:
-		return error("Unknown instruction %x", instruction);
+		return error("invalid delta: unrecognized instruction");
 	}
 }
 
@@ -238,9 +256,9 @@ static int apply_window_in_core(struct window *ctx)
 }
 
 static int apply_one_window(struct line_buffer *delta, off_t *delta_len,
-			    FILE *out)
+			    struct sliding_view *preimage, FILE *out)
 {
-	struct window ctx = WINDOW_INIT;
+	struct window ctx = WINDOW_INIT(preimage);
 	size_t out_len;
 	size_t instructions_len;
 	size_t data_len;
@@ -285,7 +303,7 @@ int svndiff0_apply(struct line_buffer *delta, off_t delta_len,
 		if (read_offset(delta, &pre_off, &delta_len) ||
 		    read_length(delta, &pre_len, &delta_len) ||
 		    move_window(preimage, pre_off, pre_len) ||
-		    apply_one_window(delta, &delta_len, postimage))
+		    apply_one_window(delta, &delta_len, preimage, postimage))
 			return -1;
 	}
 	return 0;
-- 
1.7.3.2.846.gf4b062

^ permalink raw reply related	[flat|nested] 72+ messages in thread

* [PATCH 15/16] vcs-svn: microcleanup in svndiff0 window-reading code
  2011-03-19  7:20 ` vcs-svn: integrate support for text deltas David Barr
                     ` (13 preceding siblings ...)
  2011-03-19  7:20   ` [PATCH 14/16] vcs-svn: let deltas use data from preimage David Barr
@ 2011-03-19  7:20   ` David Barr
  2011-03-19  7:20   ` [PATCH 16/16] vcs-svn: implement text-delta handling David Barr
  2011-03-28  7:00   ` vcs-svn: integrate support for text deltas Jonathan Nieder
  16 siblings, 0 replies; 72+ messages in thread
From: David Barr @ 2011-03-19  7:20 UTC (permalink / raw)
  To: Git Mailing List
  Cc: Jonathan Nieder, Ramkumar Ramachandra, Sverre Rabbelier,
	Sam Vilain, Stephen Bash, Tomas Carnecky, David Barr

From: Jonathan Nieder <jrnieder@gmail.com>

Combine some ifs.  No functional change intended.

Missed this in "vcs-svn: let deltas use data from preimage"
(2010-10-13).

Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: David Barr <david.barr@cordelta.com>
---
 vcs-svn/svndiff.c |    8 +++-----
 1 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/vcs-svn/svndiff.c b/vcs-svn/svndiff.c
index b7d40c8..ea727ee 100644
--- a/vcs-svn/svndiff.c
+++ b/vcs-svn/svndiff.c
@@ -267,11 +267,9 @@ static int apply_one_window(struct line_buffer *delta, off_t *delta_len,
 	/* "source view" offset and length already handled; */
 	if (read_length(delta, &out_len, delta_len) ||
 	    read_length(delta, &instructions_len, delta_len) ||
-	    read_length(delta, &data_len, delta_len))
-		goto error_out;
-	if (read_chunk(delta, delta_len, &ctx.instructions, instructions_len))
-		goto error_out;
-	if (read_chunk(delta, delta_len, &ctx.data, data_len))
+	    read_length(delta, &data_len, delta_len) ||
+	    read_chunk(delta, delta_len, &ctx.instructions, instructions_len) ||
+	    read_chunk(delta, delta_len, &ctx.data, data_len))
 		goto error_out;
 	strbuf_grow(&ctx.out, out_len);
 	if (apply_window_in_core(&ctx))
-- 
1.7.3.2.846.gf4b062

^ permalink raw reply related	[flat|nested] 72+ messages in thread

* [PATCH 16/16] vcs-svn: implement text-delta handling
  2011-03-19  7:20 ` vcs-svn: integrate support for text deltas David Barr
                     ` (14 preceding siblings ...)
  2011-03-19  7:20   ` [PATCH 15/16] vcs-svn: microcleanup in svndiff0 window-reading code David Barr
@ 2011-03-19  7:20   ` David Barr
  2011-03-28  7:00   ` vcs-svn: integrate support for text deltas Jonathan Nieder
  16 siblings, 0 replies; 72+ messages in thread
From: David Barr @ 2011-03-19  7:20 UTC (permalink / raw)
  To: Git Mailing List
  Cc: Jonathan Nieder, Ramkumar Ramachandra, Sverre Rabbelier,
	Sam Vilain, Stephen Bash, Tomas Carnecky, David Barr

Handle input in Subversion's dumpfile format, version 3.  This is the
format produced by "svnrdump dump" and "svnadmin dump --deltas", and
the main difference between v3 dumpfiles and the dumpfiles already
handled is that these can include nodes whose properties and text are
expressed relative to some other node.

To handle such nodes, we find which node the text and properties are
based on, handle its property changes, use the cat-blob command to
request the basis blob from the fast-import backend, use the
svndiff0_apply() helper to apply the text delta on the fly, writing
output to a temporary file, and then measure that postimage file's
length and write its content to the fast-import stream.

The temporary postimage file is shared between delta-using nodes to
avoid some file system overhead.

The svn-fe interface needs to be more complicated to accomodate the
backward flow of information from the fast-import backend to svn-fe.
The backflow fd is not needed when parsing streams without deltas,
though, so existing scripts using svn-fe on v2 dumps should
continue to work.

NEEDSWORK: generalize interface so caller sets the backflow fd, close
temporary file before exiting

Signed-off-by: David Barr <david.barr@cordelta.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: David Barr <david.barr@cordelta.com>
---
 contrib/svn-fe/svn-fe.txt |    5 +--
 t/t9010-svn-fe.sh         |  108 +++++++++++++++++++++++++++++++++++++++++++-
 vcs-svn/fast_export.c     |  109 ++++++++++++++++++++++++++++++++++++++++++++-
 vcs-svn/fast_export.h     |    3 +
 vcs-svn/svndump.c         |   13 ++++--
 5 files changed, 227 insertions(+), 11 deletions(-)

diff --git a/contrib/svn-fe/svn-fe.txt b/contrib/svn-fe/svn-fe.txt
index 85f7b83..2dd27ce 100644
--- a/contrib/svn-fe/svn-fe.txt
+++ b/contrib/svn-fe/svn-fe.txt
@@ -9,7 +9,7 @@ SYNOPSIS
 --------
 [verse]
 mkfifo backchannel &&
-svnadmin dump --incremental REPO |
+svnadmin dump --deltas REPO |
 	svn-fe [url] 3<backchannel |
 	git fast-import --cat-blob-fd=3 3>backchannel
 
@@ -32,9 +32,6 @@ Subversion's repository dump format is documented in full in
 Files in this format can be generated using the 'svnadmin dump' or
 'svk admin dump' command.
 
-Dumps produced with 'svnadmin dump --deltas' (dumpfile format v3)
-are not supported.
-
 OUTPUT FORMAT
 -------------
 The fast-import format is documented by the git-fast-import(1)
diff --git a/t/t9010-svn-fe.sh b/t/t9010-svn-fe.sh
index 720fd6b..cd7a215 100755
--- a/t/t9010-svn-fe.sh
+++ b/t/t9010-svn-fe.sh
@@ -568,7 +568,7 @@ test_expect_success PIPE 'change file mode and reiterate content' '
 	test_cmp hello actual.target
 '
 
-test_expect_success PIPE 'deltas not supported' '
+test_expect_success PIPE 'deltas supported' '
 	reinit_git &&
 	{
 		# (old) h + (inline) ello + (old) \n
@@ -629,7 +629,7 @@ test_expect_success PIPE 'deltas not supported' '
 		echo PROPS-END &&
 		cat delta
 	} >delta.dump &&
-	test_must_fail try_dump delta.dump
+	try_dump delta.dump
 '
 
 test_expect_success PIPE 'property deltas supported' '
@@ -836,6 +836,110 @@ test_expect_success PIPE 'deltas for typechange' '
 	test_cmp expect actual
 '
 
+test_expect_success PIPE 'deltas need not consume the whole preimage' '
+	reinit_git &&
+	cat >expect <<-\EOF &&
+	OBJID
+	:120000 100644 OBJID OBJID T	postimage
+	OBJID
+	:100644 120000 OBJID OBJID T	postimage
+	OBJID
+	:000000 100644 OBJID OBJID A	postimage
+	EOF
+	echo "first preimage" >expect.1 &&
+	printf target >expect.2 &&
+	printf lnk >expect.3 &&
+	{
+		printf "SVNQ%b%b%b" "QQ\017\001\017" "\0217" "first preimage\n" |
+		q_to_nul
+	} >delta.1 &&
+	{
+		properties svn:special "*" &&
+		echo PROPS-END
+	} >symlink.props &&
+	{
+		printf "SVNQ%b%b%b" "Q\002\013\004\012" "\0201\001\001\0211" "lnk target" |
+		q_to_nul
+	} >delta.2 &&
+	{
+		printf "SVNQ%b%b" "Q\004\003\004Q" "\001Q\002\002" |
+		q_to_nul
+	} >delta.3 &&
+	{
+		cat <<-\EOF &&
+		SVN-fs-dump-format-version: 3
+
+		Revision-number: 1
+		Prop-content-length: 10
+		Content-length: 10
+
+		PROPS-END
+
+		Node-path: postimage
+		Node-kind: file
+		Node-action: add
+		Text-delta: true
+		Prop-content-length: 10
+		EOF
+		echo Text-content-length: $(wc -c <delta.1) &&
+		echo Content-length: $((10 + $(wc -c <delta.1))) &&
+		echo &&
+		echo PROPS-END &&
+		cat delta.1 &&
+		cat <<-\EOF &&
+
+		Revision-number: 2
+		Prop-content-length: 10
+		Content-length: 10
+
+		PROPS-END
+
+		Node-path: postimage
+		Node-kind: file
+		Node-action: change
+		Text-delta: true
+		EOF
+		echo Prop-content-length: $(wc -c <symlink.props) &&
+		echo Text-content-length: $(wc -c <delta.2) &&
+		echo Content-length: $(($(wc -c <symlink.props) + $(wc -c <delta.2))) &&
+		echo &&
+		cat symlink.props &&
+		cat delta.2 &&
+		cat <<-\EOF &&
+
+		Revision-number: 3
+		Prop-content-length: 10
+		Content-length: 10
+
+		PROPS-END
+
+		Node-path: postimage
+		Node-kind: file
+		Node-action: change
+		Text-delta: true
+		Prop-content-length: 10
+		EOF
+		echo Text-content-length: $(wc -c <delta.3) &&
+		echo Content-length: $((10 + $(wc -c <delta.3))) &&
+		echo &&
+		echo PROPS-END &&
+		cat delta.3 &&
+		echo
+	} >deltapartial.dump &&
+	try_dump deltapartial.dump &&
+	{
+		git rev-list HEAD |
+		git diff-tree --root --stdin |
+		sed "s/$_x40/OBJID/g"
+	} >actual &&
+	test_cmp expect actual &&
+	git show HEAD:postimage >actual.3 &&
+	git show HEAD^:postimage >actual.2 &&
+	git show HEAD^^:postimage >actual.1 &&
+	test_cmp expect.1 actual.1 &&
+	test_cmp expect.2 actual.2 &&
+	test_cmp expect.3 actual.3
+'
 
 test_expect_success 'set up svn repo' '
 	svnconf=$PWD/svnconf &&
diff --git a/vcs-svn/fast_export.c b/vcs-svn/fast_export.c
index 4748253..94f12dc 100644
--- a/vcs-svn/fast_export.c
+++ b/vcs-svn/fast_export.c
@@ -7,15 +7,38 @@
 #include "strbuf.h"
 #include "quote.h"
 #include "fast_export.h"
-#include "line_buffer.h"
 #include "repo_tree.h"
 #include "strbuf.h"
+#include "svndiff.h"
+#include "sliding_window.h"
+#include "line_buffer.h"
 
 #define MAX_GITSVN_LINE_LEN 4096
+#define REPORT_FILENO 3
 
 static uint32_t first_commit_done;
+static struct line_buffer postimage = LINE_BUFFER_INIT;
 static struct line_buffer report_buffer = LINE_BUFFER_INIT;
 
+/* NEEDSWORK: move to fast_export_init() */
+static int init_postimage(void)
+{
+	static int postimage_initialized;
+	if (postimage_initialized)
+		return 0;
+	postimage_initialized = 1;
+	return buffer_tmpfile_init(&postimage);
+}
+
+static int init_report_buffer(int fd)
+{
+	static int report_buffer_initialized;
+	if (report_buffer_initialized)
+		return 0;
+	report_buffer_initialized = 1;
+	return buffer_fdinit(&report_buffer, fd);
+}
+
 void fast_export_init(int fd)
 {
 	if (buffer_fdinit(&report_buffer, fd))
@@ -127,6 +150,73 @@ static void die_short_read(struct line_buffer *input)
 	die("invalid dump: unexpected end of file");
 }
 
+static int ends_with(const char *s, size_t len, const char *suffix)
+{
+	const size_t suffixlen = strlen(suffix);
+	if (len < suffixlen)
+		return 0;
+	return !memcmp(s + len - suffixlen, suffix, suffixlen);
+}
+
+static int parse_cat_response_line(const char *header, off_t *len)
+{
+	size_t headerlen = strlen(header);
+	const char *type;
+	const char *end;
+
+	if (ends_with(header, headerlen, " missing"))
+		return error("cat-blob reports missing blob: %s", header);
+	type = memmem(header, headerlen, " blob ", strlen(" blob "));
+	if (!type)
+		return error("cat-blob header has wrong object type: %s", header);
+	*len = strtoumax(type + strlen(" blob "), (char **) &end, 10);
+	if (end == type + strlen(" blob "))
+		return error("cat-blob header does not contain length: %s", header);
+	if (*end)
+		return error("cat-blob header contains garbage after length: %s", header);
+	return 0;
+}
+
+static long apply_delta(off_t len, struct line_buffer *input,
+			const char *old_data, uint32_t old_mode)
+{
+	long ret;
+	off_t preimage_len = 0;
+	struct sliding_view preimage = SLIDING_VIEW_INIT(&report_buffer);
+	FILE *out;
+
+	if (init_postimage() || !(out = buffer_tmpfile_rewind(&postimage)))
+		die("cannot open temporary file for blob retrieval");
+	if (init_report_buffer(REPORT_FILENO))
+		die("cannot open fd 3 for feedback from fast-import");
+	if (old_data) {
+		const char *response;
+		printf("cat-blob %s\n", old_data);
+		fflush(stdout);
+		response = get_response_line();
+		if (parse_cat_response_line(response, &preimage_len))
+			die("invalid cat-blob response: %s", response);
+	}
+	if (old_mode == REPO_MODE_LNK) {
+		strbuf_addstr(&preimage.buf, "link ");
+		preimage_len += strlen("link ");
+	}
+	if (svndiff0_apply(input, len, &preimage, out))
+		die("cannot apply delta");
+	if (old_data) {
+		/* Read the remainder of preimage and trailing newline. */
+		if (move_window(&preimage, preimage_len, 1))
+			die("cannot seek to end of input");
+		if (preimage.buf.buf[0] != '\n')
+			die("missing newline after cat-blob response");
+	}
+	ret = buffer_tmpfile_prepare_to_read(&postimage);
+	if (ret < 0)
+		die("cannot read temporary file for blob retrieval");
+	strbuf_release(&preimage.buf);
+	return ret;
+}
+
 void fast_export_data(uint32_t mode, uint32_t len, struct line_buffer *input)
 {
 	if (mode == REPO_MODE_LNK) {
@@ -194,3 +284,20 @@ int fast_export_ls(const char *path, uint32_t *mode, struct strbuf *dataref)
 	ls_from_active_commit(path);
 	return parse_ls_response(get_response_line(), mode, dataref);
 }
+
+void fast_export_blob_delta(uint32_t mode,
+				uint32_t old_mode, const char *old_data,
+				uint32_t len, struct line_buffer *input)
+{
+	long postimage_len;
+	if (len > maximum_signed_value_of_type(off_t))
+		die("enormous delta");
+	postimage_len = apply_delta((off_t) len, input, old_data, old_mode);
+	if (mode == REPO_MODE_LNK) {
+		buffer_skip_bytes(&postimage, strlen("link "));
+		postimage_len -= strlen("link ");
+	}
+	printf("data %ld\n", postimage_len);
+	buffer_copy_bytes(&postimage, postimage_len);
+	fputc('\n', stdout);
+}
diff --git a/vcs-svn/fast_export.h b/vcs-svn/fast_export.h
index bc5bddf..a01774c 100644
--- a/vcs-svn/fast_export.h
+++ b/vcs-svn/fast_export.h
@@ -15,6 +15,9 @@ void fast_export_begin_commit(uint32_t revision, const char *author,
 			unsigned long timestamp);
 void fast_export_end_commit(uint32_t revision);
 void fast_export_data(uint32_t mode, uint32_t len, struct line_buffer *input);
+void fast_export_blob_delta(uint32_t mode,
+			uint32_t old_mode, const char *old_data,
+			uint32_t len, struct line_buffer *input);
 
 /* If there is no such file at that rev, returns -1, errno == ENOENT. */
 int fast_export_ls_rev(uint32_t rev, const char *path,
diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c
index dbb9c16..0c7ac64 100644
--- a/vcs-svn/svndump.c
+++ b/vcs-svn/svndump.c
@@ -206,9 +206,7 @@ static void handle_node(void)
 	 */
 	static const char *const empty_blob = "::empty::";
 	const char *old_data = NULL;
-
-	if (node_ctx.text_delta)
-		die("text deltas not supported");
+	uint32_t old_mode = REPO_MODE_BLB;
 
 	if (node_ctx.action == NODEACT_DELETE) {
 		if (have_text || have_props || node_ctx.srcRev)
@@ -243,6 +241,7 @@ static void handle_node(void)
 		if (mode != REPO_MODE_DIR && type == REPO_MODE_DIR)
 			die("invalid dump: cannot modify a file into a directory");
 		node_ctx.type = mode;
+		old_mode = mode;
 	} else if (node_ctx.action == NODEACT_ADD) {
 		if (type == REPO_MODE_DIR)
 			old_data = NULL;
@@ -277,8 +276,14 @@ static void handle_node(void)
 		fast_export_modify(node_ctx.dst.buf, node_ctx.type, old_data);
 		return;
 	}
+	if (!node_ctx.text_delta) {
+		fast_export_modify(node_ctx.dst.buf, node_ctx.type, "inline");
+		fast_export_data(node_ctx.type, node_ctx.textLength, &input);
+		return;
+	}
 	fast_export_modify(node_ctx.dst.buf, node_ctx.type, "inline");
-	fast_export_data(node_ctx.type, node_ctx.textLength, &input);
+	fast_export_blob_delta(node_ctx.type, old_mode, old_data,
+				node_ctx.textLength, &input);
 }
 
 static void begin_revision(void)
-- 
1.7.3.2.846.gf4b062

^ permalink raw reply related	[flat|nested] 72+ messages in thread

* Re: [PATCH 1/9] vcs-svn: pass paths through to fast-import
  2011-03-19  7:03 ` [PATCH 1/9] vcs-svn: pass paths through to fast-import David Barr
@ 2011-03-19  7:50   ` Jonathan Nieder
  0 siblings, 0 replies; 72+ messages in thread
From: Jonathan Nieder @ 2011-03-19  7:50 UTC (permalink / raw)
  To: David Barr
  Cc: Git Mailing List, Ramkumar Ramachandra, Sverre Rabbelier,
	Sam Vilain, Stephen Bash, Tomas Carnecky

Hi,

David Barr wrote:

>  vcs-svn/fast_export.c |   47 ++++++++++++++++++------------------
>  vcs-svn/fast_export.h |    9 +++----
>  vcs-svn/repo_tree.c   |   20 +++++++-------
>  vcs-svn/repo_tree.h   |   13 ++++------
>  vcs-svn/svndump.c     |   63 +++++++++++++++++++++----------------------------
>  5 files changed, 70 insertions(+), 82 deletions(-)

Hoorah!  Simpler and more idiomatic.

> +++ b/vcs-svn/fast_export.c
> @@ -32,30 +34,30 @@ void fast_export_reset(void)
[...]
>  	buffer_reset(&report_buffer);
>  }
>  
> -void fast_export_delete(uint32_t depth, const uint32_t *path)
> +void fast_export_delete(const char *path)
>  {
> -	printf("D \"");
> -	pool_print_seq_q(depth, path, '/', stdout);
> -	printf("\"\n");
> +	putchar('D');
> +	putchar(' ');
> +	quote_c_style(path, NULL, stdout, 0);
> +	putchar('\n');
>  }

Functional change: if the path doesn't need quoting, this won't
surround it with quotation marks.  Luckily fast-import doesn't
mind.

[...]
> -	printf("M %06"PRIo32" %s \"", mode, dataref);
> -	pool_print_seq_q(depth, path, '/', stdout);
> -	printf("\"\n");
> +	printf("M %06"PRIo32" %s ", mode, dataref);
> +	quote_c_style(path, NULL, stdout, 0);
> +	putchar('\n');
[...]
> -	printf("ls :%"PRIu32" \"", rev);
> -	pool_print_seq_q(depth, path, '/', stdout);
> -	printf("\"\n");
> +	printf("ls :%"PRIu32" ", rev);
> +	quote_c_style(path, NULL, stdout, 0);
> +	putchar('\n');

Likewise.

[...]
> -static void ls_from_active_commit(uint32_t depth, const uint32_t *path)
> +static void ls_from_active_commit(const char *path)
>  {
>  	/* ls "path/to/file" */
>  	printf("ls \"");
> -	pool_print_seq_q(depth, path, '/', stdout);
> +	quote_c_style(path, NULL, stdout, 1);
>  	printf("\"\n");

Single-argument 'ls': quotes always present.  Phew.

[...]
> --- a/vcs-svn/repo_tree.h
> +++ b/vcs-svn/repo_tree.h
> @@ -8,15 +8,12 @@
>  #define REPO_MODE_EXE 0100755
>  #define REPO_MODE_LNK 0120000
>  
> -#define REPO_MAX_PATH_LEN 4096
> -#define REPO_MAX_PATH_DEPTH 1000

Yes.

> --- a/vcs-svn/svndump.c
> +++ b/vcs-svn/svndump.c
> @@ -11,8 +11,8 @@
>  #include "repo_tree.h"
>  #include "fast_export.h"
>  #include "line_buffer.h"
> -#include "obj_pool.h"
>  #include "string_pool.h"
> +#include "strbuf.h"
>  
>  #define REPORT_FILENO 3
>  
> @@ -31,32 +31,20 @@
>  #define LENGTH_UNKNOWN (~0)
>  #define DATE_RFC2822_LEN 31
>  
> -/* Create memory pool for log messages */
> -obj_pool_gen(log, char, 4096)
> -

Not a path. :)  Snuck in from a separate patch?

>  static struct line_buffer input = LINE_BUFFER_INIT;
>  
>  #define REPORT_FILENO 3
>  
> -static char *log_copy(uint32_t length, const char *log)
> -{
[...]
> -}

Likewise.

[...]
>  static struct {
>  	uint32_t revision, author;
>  	unsigned long timestamp;
> -	char *log;
> +	struct strbuf log;
>  } rev_ctx;

Likewise.

[... etc ...]
> @@ -406,6 +395,9 @@ int svndump_init(const char *filename)
>  	if (buffer_init(&input, filename))
>  		return error("cannot open %s: %s", filename, strerror(errno));
>  	fast_export_init(REPORT_FILENO);
> +	strbuf_init(&rev_ctx.log, 4096);
> +	strbuf_init(&node_ctx.src, 4096);
> +	strbuf_init(&node_ctx.dst, 4096);

4096 because PATH_MAX or some other reason?

> @@ -415,11 +407,13 @@ int svndump_init(const char *filename)
>
>  void svndump_deinit(void)
>  {
> -	log_reset();
>  	fast_export_deinit();
>  	reset_dump_ctx(~0);
>  	reset_rev_ctx(0);
>  	reset_node_ctx(NULL);
> +	strbuf_release(&rev_ctx.log);
> +	strbuf_release(&node_ctx.src);
> +	strbuf_release(&node_ctx.dst);

Side note: it's often not clear what should go in the "prepare for next
user" routine and what should go in the "shutting down for good".  I
suppose these should use strbuf_reset and the memory would be finally
freed in svndump_reset?  Does it make sense to have two distinct
routines like this without a user to demonstrate the trade-offs?

Except as noted above,
Reviewed-by: Jonathan Nieder <jrnieder@gmail.com>

Thanks; I like where this is going.

^ permalink raw reply	[flat|nested] 72+ messages in thread

* Re: [PATCH 2/9] vcs-svn: avoid using ls command twice
  2011-03-19  7:03 ` [PATCH 2/9] vcs-svn: avoid using ls command twice David Barr
@ 2011-03-19  8:01   ` Jonathan Nieder
  0 siblings, 0 replies; 72+ messages in thread
From: Jonathan Nieder @ 2011-03-19  8:01 UTC (permalink / raw)
  To: David Barr
  Cc: Git Mailing List, Ramkumar Ramachandra, Sverre Rabbelier,
	Sam Vilain, Stephen Bash, Tomas Carnecky

David Barr wrote:

>  3 files changed, 6 insertions(+), 24 deletions(-)

:)

> --- a/vcs-svn/repo_tree.c
> +++ b/vcs-svn/repo_tree.c
> @@ -8,39 +8,23 @@
>  #include "repo_tree.h"
>  #include "fast_export.h"
>  
> -const char *repo_read_path(const char *path)
> +const char *repo_read_path(const char *path, uint32_t *mode_out)

Obviously good, both because it means we can use one round-trip
to get the same information that  previously took two and because
the API just makes more sense.  Thanks.

^ permalink raw reply	[flat|nested] 72+ messages in thread

* Re: [PATCH 3/9] vcs-svn: implement perfect hash for node-prop keys
  2011-03-19  7:03 ` [PATCH 3/9] vcs-svn: implement perfect hash for node-prop keys David Barr
@ 2011-03-19  8:51   ` Jonathan Nieder
  2011-03-21  1:26     ` [PATCH 1/3] " David Barr
  0 siblings, 1 reply; 72+ messages in thread
From: Jonathan Nieder @ 2011-03-19  8:51 UTC (permalink / raw)
  To: David Barr
  Cc: Git Mailing List, Ramkumar Ramachandra, Sverre Rabbelier,
	Sam Vilain, Stephen Bash, Tomas Carnecky

David Barr wrote:

>  vcs-svn/svndump.c |   50 ++++++++++++++++++++++++++++++++------------------
>  1 files changed, 32 insertions(+), 18 deletions(-)

Alas.  But it's probably worth it for the chance to get rid of
knowledge of how to intern strings.

> --- a/vcs-svn/svndump.c
> +++ b/vcs-svn/svndump.c
[...]
> @@ -113,22 +107,38 @@ static void init_keys(void)
>  	keys.prop_delta = pool_intern("Prop-delta");
>  }
>  
> -static void handle_property(uint32_t key, const char *val, uint32_t len,
> +static void handle_property(const char *key, const char *val, uint32_t len,
>  				uint32_t *type_set)
>  {
> -	if (key == keys.svn_log) {
> +	const int key_len = strlen(key);
> +	switch (key_len) {
> +	case 7:
> +		if (memcmp(key, "svn:log", 7))
> +			break;

Crazy idea: to make it visible at a glance when the numbers are wrong,
one can do:

	switch (key_len + 1) {
	case sizeof("svn:log"):
		if (memcmp(key, "svn:log", strlen("svn:log")))
			break;

This only makes the redundancy more obvious, of course.  It could
be reduced a little with something like

 static int prefixcmp_len(const char *str, size_t str_len,
			  const char *prefix, size_t prefix_len)
 {
	if (prefix_len > str_len)
		return 1;
	return memcmp(str, prefix, prefix_len);
 }

but that's probably not worth the cognitive load.

[...]
> -	} else if (key == keys.svn_executable || key == keys.svn_special) {
> +		break;
> +	case 14:
> +		if (memcmp(key, "svn:executable", 14))
> +			break;
> +	case 11:
> +		if (key_len == 11 && memcmp(key, "svn:special", 11))
> +			break;

Maybe, to avoid an unnecessary /* fall through */:

	case sizeof("svn:executable"):
	case sizeof("svn:special"):
		if (key_len == strlen("svn:executable") &&
		    memcmp(key, "svn:executable", strlen(...)))
			break;
		if (key_len == strlen("svn:special") &&
		    memcmp(key, "svn:special", strlen("svn:special")))
			break;

>  		if (*type_set) {
[...]
> @@ -147,7 +157,7 @@ static void handle_property(uint32_t key, const char *val, uint32_t len,
>  
>  static void read_props(void)
>  {
> -	uint32_t key = ~0;
> +	char key[16] = {0};

Probably warrants a comment:

	/* the longest key we pay attention to is "<whatever>" */

>  	const char *t;
>  	/*
>  	 * NEEDSWORK: to support simple mode changes like
> @@ -175,16 +185,20 @@ static void read_props(void)
>  
>  		switch (type) {
>  		case 'K':
> -			key = pool_intern(val);
> -			continue;
>  		case 'D':
> -			key = pool_intern(val);
> +			if (len < sizeof(key))
> +				memcpy(key, val, len + 1);

What happens on I/O error, when val is NULL?  How about early EOF
or malformed input, when strlen(val) < len?

Some tests would also be a comfort.

I'm not so happy with the table of (at first glance) magic-seeming
numbers and the error handling looks a little tricky but aside from
those details this seems like a reasonable way to avoid some
complication without sacrificing speed.

Speaking of which, any hints for people who want to time this patch
(and other patches in the series)?

Thanks.
Jonathan

^ permalink raw reply	[flat|nested] 72+ messages in thread

* Re: [PATCH 4/9] vcs-svn: implement perfect hash for top-level keys
  2011-03-19  7:03 ` [PATCH 4/9] vcs-svn: implement perfect hash for top-level keys David Barr
@ 2011-03-19  8:57   ` Jonathan Nieder
  0 siblings, 0 replies; 72+ messages in thread
From: Jonathan Nieder @ 2011-03-19  8:57 UTC (permalink / raw)
  To: David Barr
  Cc: Git Mailing List, Ramkumar Ramachandra, Sverre Rabbelier,
	Sam Vilain, Stephen Bash, Tomas Carnecky

David Barr wrote:

> --- a/vcs-svn/svndump.c
> +++ b/vcs-svn/svndump.c
> @@ -323,16 +296,25 @@ void svndump_read(const char *url)
>  			continue;
>  		*val++ = '\0';
>  		*val++ = '\0';
> -		key = pool_intern(t);
>  
> -		if (key == keys.svn_fs_dump_format_version) {
> +		/* strlen(key) */
> +		switch (val - t - 2) { 
> +		case 26:
> +			if (memcmp(t, "SVN-fs-dump-format-version", 26))
> +				continue;

Same comments as the previous patch apply here.

Might make sense to split out the loop body (or at least the giant
switch statement) as a separate function for easier contemplation.

[...]
> -		} else if (key == keys.content_length) {
> +			break;
> +		case 14:
> +			if (memcmp(t, "Content-length", 14))
> +				continue;
>  			len = atoi(val);

Thanks for a very clean patch.

^ permalink raw reply	[flat|nested] 72+ messages in thread

* Re: [PATCH 5/9] vcs-svn: factor out usage of string_pool
  2011-03-19  7:03 ` [PATCH 5/9] vcs-svn: factor out usage of string_pool David Barr
@ 2011-03-19  9:08   ` Jonathan Nieder
  0 siblings, 0 replies; 72+ messages in thread
From: Jonathan Nieder @ 2011-03-19  9:08 UTC (permalink / raw)
  To: David Barr
  Cc: Git Mailing List, Ramkumar Ramachandra, Sverre Rabbelier,
	Sam Vilain, Stephen Bash, Tomas Carnecky

David Barr wrote:

> [Subject: vcs-svn: factor out usage of string_pool]

This actually means something like: use strbufs and strings instead of
interned strings for values of rev, dump, and node fields that happen
to be strings.  After this change, there are no more users of the
string-pool library left.

> @@ -71,14 +71,16 @@ static void reset_rev_ctx(uint32_t revision)
>  	rev_ctx.revision = revision;
>  	rev_ctx.timestamp = 0;
>  	strbuf_reset(&rev_ctx.log);
> -	rev_ctx.author = ~0;
> +	strbuf_reset(&rev_ctx.author);

Side note: should the default timestamp really be the epoch?  I'd
rather the default timestamp be the timestamp of the parent revision,
to make out-of-order dates a little less likely.

>  }
>  
> -static void reset_dump_ctx(uint32_t url)
> +static void reset_dump_ctx(const char *url)
>  {
> -	dump_ctx.url = url;
> +	strbuf_reset(&dump_ctx.url);
> +	if (url)
> +		strbuf_addstr(&dump_ctx.url, url);

Good, we keep our own copy of the url still.

> @@ -91,13 +93,15 @@ static void handle_property(const char *key, const char *val, uint32_t len,
>  			break;
>  		if (!val)
>  			die("invalid dump: unsets svn:log");
> -		/* Value length excludes terminating nul. */
> -		strbuf_add(&rev_ctx.log, val, len + 1);
> +		strbuf_reset(&rev_ctx.log);
> +		strbuf_add(&rev_ctx.log, val, len);

What is this change about?

> @@ -447,5 +456,4 @@ void svndump_reset(void)
>  {
>  	fast_export_reset();
>  	buffer_reset(&input);
> -	pool_reset();

strbuf_release(&dump_ctx.url)?

Likewise for dump_ctx.uuid and the other one.

Thanks; except as noted above this looks good.

^ permalink raw reply	[flat|nested] 72+ messages in thread

* Re: [PATCH 9/9] vcs-svn: use strchr to find RFC822 delimiter
  2011-03-19  7:03 ` [PATCH 9/9] vcs-svn: use strchr to find RFC822 delimiter David Barr
@ 2011-03-19  9:10   ` Jonathan Nieder
  0 siblings, 0 replies; 72+ messages in thread
From: Jonathan Nieder @ 2011-03-19  9:10 UTC (permalink / raw)
  To: David Barr
  Cc: Git Mailing List, Ramkumar Ramachandra, Sverre Rabbelier,
	Sam Vilain, Stephen Bash, Tomas Carnecky

David Barr wrote:

> --- a/vcs-svn/svndump.c
> +++ b/vcs-svn/svndump.c
> @@ -296,10 +296,12 @@ void svndump_read(const char *url)
>  
>  	reset_dump_ctx(url);
>  	while ((t = buffer_read_line(&input))) {
> -		val = strstr(t, ": ");
> +		val = strchr(t, ':');
>  		if (!val)
>  			continue;
>  		*val++ = '\0';
> +		if (*val != ' ')
> +			continue;

This one and the three preceding it (removing sublibraries)
make sense.  Thanks for a pleasant read.

Good night,
Jonathan

^ permalink raw reply	[flat|nested] 72+ messages in thread

* [PATCH 1/3] vcs-svn: implement perfect hash for node-prop keys
  2011-03-19  8:51   ` Jonathan Nieder
@ 2011-03-21  1:26     ` David Barr
  2011-03-21  1:26       ` [PATCH 2/3] vcs-svn: implement perfect hash for top-level keys David Barr
  2011-03-21  1:26       ` [PATCH 3/3] vcs-svn: use switch rather than cascading ifs David Barr
  0 siblings, 2 replies; 72+ messages in thread
From: David Barr @ 2011-03-21  1:26 UTC (permalink / raw)
  To: Git Mailing List
  Cc: Jonathan Nieder, Ramkumar Ramachandra, Sverre Rabbelier,
	Sam Vilain, Stephen Bash, Tomas Carnecky, David Barr

This eliminates one more dependency on string_pool.

Signed-off-by: David Barr <david.barr@cordelta.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: David Barr <david.barr@cordelta.com>
Reviewed-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: David Barr <david.barr@cordelta.com>
---
 vcs-svn/svndump.c |   64 +++++++++++++++++++++++++++++++++++++---------------
 1 files changed, 45 insertions(+), 19 deletions(-)

diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c
index 15b173e..48848ff 100644
--- a/vcs-svn/svndump.c
+++ b/vcs-svn/svndump.c
@@ -52,8 +52,7 @@ static struct {
 } dump_ctx;
 
 static struct {
-	uint32_t svn_log, svn_author, svn_date, svn_executable, svn_special, uuid,
-		revision_number, node_path, node_kind, node_action,
+	uint32_t uuid, revision_number, node_path, node_kind, node_action,
 		node_copyfrom_path, node_copyfrom_rev, text_content_length,
 		prop_content_length, content_length, svn_fs_dump_format_version,
 		/* version 3 format */
@@ -92,11 +91,6 @@ static void reset_dump_ctx(uint32_t url)
 
 static void init_keys(void)
 {
-	keys.svn_log = pool_intern("svn:log");
-	keys.svn_author = pool_intern("svn:author");
-	keys.svn_date = pool_intern("svn:date");
-	keys.svn_executable = pool_intern("svn:executable");
-	keys.svn_special = pool_intern("svn:special");
 	keys.uuid = pool_intern("UUID");
 	keys.revision_number = pool_intern("Revision-number");
 	keys.node_path = pool_intern("Node-path");
@@ -113,22 +107,43 @@ static void init_keys(void)
 	keys.prop_delta = pool_intern("Prop-delta");
 }
 
-static void handle_property(uint32_t key, const char *val, uint32_t len,
+/* Compare string to literal of equal length; must be guarded by length test. */
+#define constcmp(s, ref) memcmp((s), (ref), sizeof(ref) - 1)
+
+static void handle_property(const char *key, const int sizeof_key,
+				const char *val, uint32_t len,
 				uint32_t *type_set)
 {
-	if (key == keys.svn_log) {
+	switch (sizeof_key) {
+	case sizeof("svn:log"):
+		if (constcmp(key, "svn:log"))
+			break;
 		if (!val)
 			die("invalid dump: unsets svn:log");
 		/* Value length excludes terminating nul. */
 		strbuf_add(&rev_ctx.log, val, len + 1);
-	} else if (key == keys.svn_author) {
+		break;
+	case sizeof("svn:author"):
+		if (constcmp(key, "svn:author"))
+			break;
 		rev_ctx.author = pool_intern(val);
-	} else if (key == keys.svn_date) {
+		break;
+	case sizeof("svn:date"):
+		if (constcmp(key, "svn:date"))
+			break;
 		if (!val)
 			die("invalid dump: unsets svn:date");
 		if (parse_date_basic(val, &rev_ctx.timestamp, NULL))
 			warning("invalid timestamp: %s", val);
-	} else if (key == keys.svn_executable || key == keys.svn_special) {
+		break;
+	case sizeof("svn:executable"):
+	case sizeof("svn:special"):
+		if (sizeof_key == sizeof("svn:executable") &&
+		    constcmp(key, "svn:executable"))
+			break;
+		if (sizeof_key == sizeof("svn:special") &&
+		    constcmp(key, "svn:special"))
+			break;
 		if (*type_set) {
 			if (!val)
 				return;
@@ -139,7 +154,7 @@ static void handle_property(uint32_t key, const char *val, uint32_t len,
 			return;
 		}
 		*type_set = 1;
-		node_ctx.type = key == keys.svn_executable ?
+		node_ctx.type = sizeof_key == sizeof("svn:executable") ?
 				REPO_MODE_EXE :
 				REPO_MODE_LNK;
 	}
@@ -147,7 +162,9 @@ static void handle_property(uint32_t key, const char *val, uint32_t len,
 
 static void read_props(void)
 {
-	uint32_t key = ~0;
+	/* Reserve enough space for the longest standard key. */
+	char key[sizeof("svn:executable")] = {0};
+	int sizeof_key = 1;
 	const char *t;
 	/*
 	 * NEEDSWORK: to support simple mode changes like
@@ -175,16 +192,25 @@ static void read_props(void)
 
 		switch (type) {
 		case 'K':
-			key = pool_intern(val);
-			continue;
 		case 'D':
-			key = pool_intern(val);
+			if (len < sizeof(key) && val) {
+				memcpy(key, val, len + 1);
+				sizeof_key = len + 1;
+			} else {
+				/* nonstandard key. */
+				*key = '\0';
+				sizeof_key = 1;
+			}
+			if (type == 'K')
+				continue;
+			assert(type == 'D');
 			val = NULL;
 			len = 0;
 			/* fall through */
 		case 'V':
-			handle_property(key, val, len, &type_set);
-			key = ~0;
+			handle_property(key, sizeof_key, val, len, &type_set);
+			*key = '\0';
+			sizeof_key = 1;
 			continue;
 		default:
 			die("invalid property line: %s\n", t);
-- 
1.7.3.2.846.gf4b062

^ permalink raw reply related	[flat|nested] 72+ messages in thread

* [PATCH 2/3] vcs-svn: implement perfect hash for top-level keys
  2011-03-21  1:26     ` [PATCH 1/3] " David Barr
@ 2011-03-21  1:26       ` David Barr
  2011-03-21  1:26       ` [PATCH 3/3] vcs-svn: use switch rather than cascading ifs David Barr
  1 sibling, 0 replies; 72+ messages in thread
From: David Barr @ 2011-03-21  1:26 UTC (permalink / raw)
  To: Git Mailing List
  Cc: Jonathan Nieder, Ramkumar Ramachandra, Sverre Rabbelier,
	Sam Vilain, Stephen Bash, Tomas Carnecky, David Barr

This eliminates one more dependency on string_pool.

Signed-off-by: David Barr <david.barr@cordelta.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: David Barr <david.barr@cordelta.com>
Reviewed-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: David Barr <david.barr@cordelta.com>
---
 vcs-svn/svndump.c |  110 +++++++++++++++++++++++++++++------------------------
 1 files changed, 60 insertions(+), 50 deletions(-)

diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c
index 48848ff..f03e8cf 100644
--- a/vcs-svn/svndump.c
+++ b/vcs-svn/svndump.c
@@ -51,14 +51,6 @@ static struct {
 	uint32_t version, uuid, url;
 } dump_ctx;
 
-static struct {
-	uint32_t uuid, revision_number, node_path, node_kind, node_action,
-		node_copyfrom_path, node_copyfrom_rev, text_content_length,
-		prop_content_length, content_length, svn_fs_dump_format_version,
-		/* version 3 format */
-		text_delta, prop_delta;
-} keys;
-
 static void reset_node_ctx(char *fname)
 {
 	node_ctx.type = 0;
@@ -89,24 +81,6 @@ static void reset_dump_ctx(uint32_t url)
 	dump_ctx.uuid = ~0;
 }
 
-static void init_keys(void)
-{
-	keys.uuid = pool_intern("UUID");
-	keys.revision_number = pool_intern("Revision-number");
-	keys.node_path = pool_intern("Node-path");
-	keys.node_kind = pool_intern("Node-kind");
-	keys.node_action = pool_intern("Node-action");
-	keys.node_copyfrom_path = pool_intern("Node-copyfrom-path");
-	keys.node_copyfrom_rev = pool_intern("Node-copyfrom-rev");
-	keys.text_content_length = pool_intern("Text-content-length");
-	keys.prop_content_length = pool_intern("Prop-content-length");
-	keys.content_length = pool_intern("Content-length");
-	keys.svn_fs_dump_format_version = pool_intern("SVN-fs-dump-format-version");
-	/* version 3 format (Subversion 1.1.0) */
-	keys.text_delta = pool_intern("Text-delta");
-	keys.prop_delta = pool_intern("Prop-delta");
-}
-
 /* Compare string to literal of equal length; must be guarded by length test. */
 #define constcmp(s, ref) memcmp((s), (ref), sizeof(ref) - 1)
 
@@ -326,7 +300,6 @@ void svndump_read(const char *url)
 	char *t;
 	uint32_t active_ctx = DUMP_CTX;
 	uint32_t len;
-	uint32_t key;
 
 	reset_dump_ctx(pool_intern(url));
 	while ((t = buffer_read_line(&input))) {
@@ -335,16 +308,25 @@ void svndump_read(const char *url)
 			continue;
 		*val++ = '\0';
 		*val++ = '\0';
-		key = pool_intern(t);
 
-		if (key == keys.svn_fs_dump_format_version) {
+		/* strlen(key) + 1 */
+		switch (val - t - 1) {
+		case sizeof("SVN-fs-dump-format-version"):
+			if (constcmp(t, "SVN-fs-dump-format-version"))
+				continue;
 			dump_ctx.version = atoi(val);
 			if (dump_ctx.version > 3)
 				die("expected svn dump format version <= 3, found %"PRIu32,
 				    dump_ctx.version);
-		} else if (key == keys.uuid) {
+			break;
+		case sizeof("UUID"):
+			if (constcmp(t, "UUID"))
+				continue;
 			dump_ctx.uuid = pool_intern(val);
-		} else if (key == keys.revision_number) {
+			break;
+		case sizeof("Revision-number"):
+			if (constcmp(t, "Revision-number"))
+				continue;
 			if (active_ctx == NODE_CTX)
 				handle_node();
 			if (active_ctx == REV_CTX)
@@ -353,21 +335,31 @@ void svndump_read(const char *url)
 				end_revision();
 			active_ctx = REV_CTX;
 			reset_rev_ctx(atoi(val));
-		} else if (key == keys.node_path) {
-			if (active_ctx == NODE_CTX)
-				handle_node();
-			if (active_ctx == REV_CTX)
-				begin_revision();
-			active_ctx = NODE_CTX;
-			reset_node_ctx(val);
-		} else if (key == keys.node_kind) {
+			break;
+		case sizeof("Node-path"):
+			if (prefixcmp(t, "Node-"))
+				continue;
+			if (!constcmp(t + strlen("Node-"), "path")) {
+				if (active_ctx == NODE_CTX)
+					handle_node();
+				if (active_ctx == REV_CTX)
+					begin_revision();
+				active_ctx = NODE_CTX;
+				reset_node_ctx(val);
+				break;
+			}
+			if (constcmp(t + strlen("Node-"), "kind"))
+				continue;
 			if (!strcmp(val, "dir"))
 				node_ctx.type = REPO_MODE_DIR;
 			else if (!strcmp(val, "file"))
 				node_ctx.type = REPO_MODE_BLB;
 			else
 				fprintf(stderr, "Unknown node-kind: %s\n", val);
-		} else if (key == keys.node_action) {
+			break;
+		case sizeof("Node-action"):
+			if (constcmp(t, "Node-action"))
+				continue;
 			if (!strcmp(val, "delete")) {
 				node_ctx.action = NODEACT_DELETE;
 			} else if (!strcmp(val, "add")) {
@@ -380,20 +372,39 @@ void svndump_read(const char *url)
 				fprintf(stderr, "Unknown node-action: %s\n", val);
 				node_ctx.action = NODEACT_UNKNOWN;
 			}
-		} else if (key == keys.node_copyfrom_path) {
+			break;
+		case sizeof("Node-copyfrom-path"):
+			if (constcmp(t, "Node-copyfrom-path"))
+				continue;
 			strbuf_reset(&node_ctx.src);
 			strbuf_addstr(&node_ctx.src, val);
-		} else if (key == keys.node_copyfrom_rev) {
+			break;
+		case sizeof("Node-copyfrom-rev"):
+			if (constcmp(t, "Node-copyfrom-rev"))
+				continue;
 			node_ctx.srcRev = atoi(val);
-		} else if (key == keys.text_content_length) {
-			node_ctx.textLength = atoi(val);
-		} else if (key == keys.prop_content_length) {
+			break;
+		case sizeof("Text-content-length"):
+			if (!constcmp(t, "Text-content-length")) {
+				node_ctx.textLength = atoi(val);
+				break;
+			}
+			if (constcmp(t, "Prop-content-length"))
+				continue;
 			node_ctx.propLength = atoi(val);
-		} else if (key == keys.text_delta) {
-			node_ctx.text_delta = !strcmp(val, "true");
-		} else if (key == keys.prop_delta) {
+			break;
+		case sizeof("Text-delta"):
+			if (!constcmp(t, "Text-delta")) {
+				node_ctx.text_delta = !strcmp(val, "true");
+				break;
+			}
+			if (constcmp(t, "Prop-delta"))
+				continue;
 			node_ctx.prop_delta = !strcmp(val, "true");
-		} else if (key == keys.content_length) {
+			break;
+		case sizeof("Content-length"):
+			if (constcmp(t, "Content-length"))
+				continue;
 			len = atoi(val);
 			buffer_read_line(&input);
 			if (active_ctx == REV_CTX) {
@@ -426,7 +437,6 @@ int svndump_init(const char *filename)
 	reset_dump_ctx(~0);
 	reset_rev_ctx(0);
 	reset_node_ctx(NULL);
-	init_keys();
 	return 0;
 }
 
-- 
1.7.3.2.846.gf4b062

^ permalink raw reply related	[flat|nested] 72+ messages in thread

* [PATCH 3/3] vcs-svn: use switch rather than cascading ifs
  2011-03-21  1:26     ` [PATCH 1/3] " David Barr
  2011-03-21  1:26       ` [PATCH 2/3] vcs-svn: implement perfect hash for top-level keys David Barr
@ 2011-03-21  1:26       ` David Barr
  2011-03-21  1:38         ` [PATCHv2] " David Barr
  1 sibling, 1 reply; 72+ messages in thread
From: David Barr @ 2011-03-21  1:26 UTC (permalink / raw)
  To: Git Mailing List
  Cc: Jonathan Nieder, Ramkumar Ramachandra, Sverre Rabbelier,
	Sam Vilain, Stephen Bash, Tomas Carnecky, David Barr

In the spirit of the last two changes:
Switch on length and use constcmp for parsing headers with restricted values.

Signed-off-by: David Barr <david.barr@cordelta.com>
---
 vcs-svn/svndump.c |   38 +++++++++++++++++++++++++++++---------
 1 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c
index f03e8cf..fe14ce2 100644
--- a/vcs-svn/svndump.c
+++ b/vcs-svn/svndump.c
@@ -350,25 +350,45 @@ void svndump_read(const char *url)
 			}
 			if (constcmp(t + strlen("Node-"), "kind"))
 				continue;
-			if (!strcmp(val, "dir"))
+			switch (strlen(val) + 1) {
+			case sizeof("dir"):
+				if (constcmp(val, "dir"))
+					break;
 				node_ctx.type = REPO_MODE_DIR;
-			else if (!strcmp(val, "file"))
+				break;
+			case sizeof("file"):
+				if (constcmp(val, "file"))
+					break;
 				node_ctx.type = REPO_MODE_BLB;
-			else
+				break;
+			default:
 				fprintf(stderr, "Unknown node-kind: %s\n", val);
+			}
 			break;
 		case sizeof("Node-action"):
 			if (constcmp(t, "Node-action"))
 				continue;
-			if (!strcmp(val, "delete")) {
-				node_ctx.action = NODEACT_DELETE;
-			} else if (!strcmp(val, "add")) {
+			switch (strlen(val) + 1) {
+			case sizeof("add"):
+				if (constcmp(val, "add"))
+					break;
 				node_ctx.action = NODEACT_ADD;
-			} else if (!strcmp(val, "change")) {
+				break;
+			case sizeof("change"):
+				if (constcmp(val, "change"))
+					break;
 				node_ctx.action = NODEACT_CHANGE;
-			} else if (!strcmp(val, "replace")) {
+				break;
+			case sizeof("delete"):
+				if (!constcmp(val, "delete")) {
+					node_ctx.action = NODEACT_DELETE;
+					break;
+				}
+				if (constcmp(val, "replace"))
+					break;
 				node_ctx.action = NODEACT_REPLACE;
-			} else {
+				break;
+			default:
 				fprintf(stderr, "Unknown node-action: %s\n", val);
 				node_ctx.action = NODEACT_UNKNOWN;
 			}
-- 
1.7.3.2.846.gf4b062

^ permalink raw reply related	[flat|nested] 72+ messages in thread

* [PATCHv2] vcs-svn: use switch rather than cascading ifs
  2011-03-21  1:26       ` [PATCH 3/3] vcs-svn: use switch rather than cascading ifs David Barr
@ 2011-03-21  1:38         ` David Barr
  0 siblings, 0 replies; 72+ messages in thread
From: David Barr @ 2011-03-21  1:38 UTC (permalink / raw)
  To: Git Mailing List
  Cc: Jonathan Nieder, Ramkumar Ramachandra, Sverre Rabbelier,
	Sam Vilain, Stephen Bash, Tomas Carnecky, David Barr

In the spirit of the last two changes:
Switch on length and use constcmp for parsing headers with restricted values.

Signed-off-by: David Barr <david.barr@cordelta.com>
---

 Silly me, the last version didn't even compile.

 vcs-svn/svndump.c |   39 +++++++++++++++++++++++++++++----------
 1 files changed, 29 insertions(+), 10 deletions(-)

diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c
index f03e8cf..ccf2520 100644
--- a/vcs-svn/svndump.c
+++ b/vcs-svn/svndump.c
@@ -350,25 +350,44 @@ void svndump_read(const char *url)
 			}
 			if (constcmp(t + strlen("Node-"), "kind"))
 				continue;
-			if (!strcmp(val, "dir"))
+			switch (strlen(val) + 1) {
+			case sizeof("dir"):
+				if (constcmp(val, "dir"))
+					break;
 				node_ctx.type = REPO_MODE_DIR;
-			else if (!strcmp(val, "file"))
+				break;
+			case sizeof("file"):
+				if (constcmp(val, "file"))
+					break;
 				node_ctx.type = REPO_MODE_BLB;
-			else
+				break;
+			default:
 				fprintf(stderr, "Unknown node-kind: %s\n", val);
+			}
 			break;
 		case sizeof("Node-action"):
 			if (constcmp(t, "Node-action"))
 				continue;
-			if (!strcmp(val, "delete")) {
-				node_ctx.action = NODEACT_DELETE;
-			} else if (!strcmp(val, "add")) {
+			switch (strlen(val) + 1) {
+			case sizeof("add"):
+				if (constcmp(val, "add"))
+					break;
 				node_ctx.action = NODEACT_ADD;
-			} else if (!strcmp(val, "change")) {
-				node_ctx.action = NODEACT_CHANGE;
-			} else if (!strcmp(val, "replace")) {
+				break;
+			case sizeof("change"):
+				if (!constcmp(val, "change")) {
+					node_ctx.action = NODEACT_CHANGE;
+					break;
+				}
+				if (!constcmp(val, "delete")) {
+					node_ctx.action = NODEACT_DELETE;
+					break;
+				}
+				if (constcmp(val, "replace"))
+					break;
 				node_ctx.action = NODEACT_REPLACE;
-			} else {
+				break;
+			default:
 				fprintf(stderr, "Unknown node-action: %s\n", val);
 				node_ctx.action = NODEACT_UNKNOWN;
 			}
-- 
1.7.3.2.846.gf4b062

^ permalink raw reply related	[flat|nested] 72+ messages in thread

* [PATCHv2 00/11] vcs-svn: purge obsolete data structures and code
  2011-03-19  7:03 vcs-svn: purge obsolete data structures and code David Barr
                   ` (9 preceding siblings ...)
  2011-03-19  7:20 ` vcs-svn: integrate support for text deltas David Barr
@ 2011-03-21 23:49 ` David Barr
  2011-03-21 23:49   ` [PATCH 01/11] vcs-svn: use strbuf for revision log David Barr
                     ` (11 more replies)
  10 siblings, 12 replies; 72+ messages in thread
From: David Barr @ 2011-03-21 23:49 UTC (permalink / raw)
  To: Git Mailing List
  Cc: Jonathan Nieder, Ramkumar Ramachandra, Sverre Rabbelier,
	Sam Vilain, Stephen Bash, Tomas Carnecky

Thanks Jonathan for reviewing the series and for suggesting
a good compromise between readability and performance.

The first patch of the last version has been split into two.
Patch 6 follows the spirit of patches 4 and 5, for a consistent
approach to switching on constant strings.

 .gitignore              |    3 -
 Makefile                |   13 +--
 t/t0080-vcs-svn.sh      |  117 ------------------
 test-obj-pool.c         |  116 ------------------
 test-string-pool.c      |   31 -----
 test-treap.c            |   70 -----------
 vcs-svn/LICENSE         |    3 -
 vcs-svn/fast_export.c   |   64 +++++-----
 vcs-svn/fast_export.h   |   14 +-
 vcs-svn/obj_pool.h      |   61 ---------
 vcs-svn/repo_tree.c     |   36 ++----
 vcs-svn/repo_tree.h     |   12 +--
 vcs-svn/string_pool.c   |  113 -----------------
 vcs-svn/string_pool.h   |   12 --
 vcs-svn/string_pool.txt |   43 -------
 vcs-svn/svndump.c       |  309 ++++++++++++++++++++++++++++-------------------
 vcs-svn/trp.h           |  237 ------------------------------------
 vcs-svn/trp.txt         |  109 -----------------
 18 files changed, 237 insertions(+), 1126 deletions(-)

^ permalink raw reply	[flat|nested] 72+ messages in thread

* [PATCH 01/11] vcs-svn: use strbuf for revision log
  2011-03-21 23:49 ` [PATCHv2 00/11] vcs-svn: purge obsolete data structures and code David Barr
@ 2011-03-21 23:49   ` David Barr
  2011-03-21 23:49   ` [PATCH 02/11] vcs-svn: pass paths through to fast-import David Barr
                     ` (10 subsequent siblings)
  11 siblings, 0 replies; 72+ messages in thread
From: David Barr @ 2011-03-21 23:49 UTC (permalink / raw)
  To: Git Mailing List
  Cc: Jonathan Nieder, Ramkumar Ramachandra, Sverre Rabbelier,
	Sam Vilain, Stephen Bash, Tomas Carnecky, David Barr

This is a small step towards removing dependence on obj_pool.h

Signed-off-by: David Barr <david.barr@cordelta.com>
Reviewed-by: Jonathan Nieder <jrnieder@gmail.com>
---
 vcs-svn/svndump.c |   26 +++++++-------------------
 1 files changed, 7 insertions(+), 19 deletions(-)

diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c
index 7ecb227..da154ad 100644
--- a/vcs-svn/svndump.c
+++ b/vcs-svn/svndump.c
@@ -11,8 +11,8 @@
 #include "repo_tree.h"
 #include "fast_export.h"
 #include "line_buffer.h"
-#include "obj_pool.h"
 #include "string_pool.h"
+#include "strbuf.h"
 
 #define REPORT_FILENO 3
 
@@ -31,22 +31,10 @@
 #define LENGTH_UNKNOWN (~0)
 #define DATE_RFC2822_LEN 31
 
-/* Create memory pool for log messages */
-obj_pool_gen(log, char, 4096)
-
 static struct line_buffer input = LINE_BUFFER_INIT;
 
 #define REPORT_FILENO 3
 
-static char *log_copy(uint32_t length, const char *log)
-{
-	char *buffer;
-	log_free(log_pool.size);
-	buffer = log_pointer(log_alloc(length));
-	strncpy(buffer, log, length);
-	return buffer;
-}
-
 static struct {
 	uint32_t action, propLength, textLength, srcRev, type;
 	uint32_t src[REPO_MAX_PATH_DEPTH], dst[REPO_MAX_PATH_DEPTH];
@@ -56,7 +44,7 @@ static struct {
 static struct {
 	uint32_t revision, author;
 	unsigned long timestamp;
-	char *log;
+	struct strbuf log;
 } rev_ctx;
 
 static struct {
@@ -89,7 +77,7 @@ static void reset_rev_ctx(uint32_t revision)
 {
 	rev_ctx.revision = revision;
 	rev_ctx.timestamp = 0;
-	rev_ctx.log = NULL;
+	strbuf_reset(&rev_ctx.log);
 	rev_ctx.author = ~0;
 }
 
@@ -130,7 +118,7 @@ static void handle_property(uint32_t key, const char *val, uint32_t len,
 		if (!val)
 			die("invalid dump: unsets svn:log");
 		/* Value length excludes terminating nul. */
-		rev_ctx.log = log_copy(len + 1, val);
+		strbuf_add(&rev_ctx.log, val, len + 1);
 	} else if (key == keys.svn_author) {
 		rev_ctx.author = pool_intern(val);
 	} else if (key == keys.svn_date) {
@@ -297,7 +285,7 @@ static void begin_revision(void)
 {
 	if (!rev_ctx.revision)	/* revision 0 gets no git commit. */
 		return;
-	fast_export_begin_commit(rev_ctx.revision, rev_ctx.author, rev_ctx.log,
+	fast_export_begin_commit(rev_ctx.revision, rev_ctx.author, rev_ctx.log.buf,
 		dump_ctx.uuid, dump_ctx.url, rev_ctx.timestamp);
 }
 
@@ -406,6 +394,7 @@ int svndump_init(const char *filename)
 	if (buffer_init(&input, filename))
 		return error("cannot open %s: %s", filename, strerror(errno));
 	fast_export_init(REPORT_FILENO);
+	strbuf_init(&rev_ctx.log, 4096);
 	reset_dump_ctx(~0);
 	reset_rev_ctx(0);
 	reset_node_ctx(NULL);
@@ -415,11 +404,11 @@ int svndump_init(const char *filename)
 
 void svndump_deinit(void)
 {
-	log_reset();
 	fast_export_deinit();
 	reset_dump_ctx(~0);
 	reset_rev_ctx(0);
 	reset_node_ctx(NULL);
+	strbuf_release(&rev_ctx.log);
 	if (buffer_deinit(&input))
 		fprintf(stderr, "Input error\n");
 	if (ferror(stdout))
@@ -428,7 +417,6 @@ void svndump_deinit(void)
 
 void svndump_reset(void)
 {
-	log_reset();
 	fast_export_reset();
 	buffer_reset(&input);
 	reset_dump_ctx(~0);
-- 
1.7.3.2.846.gf4b062

^ permalink raw reply related	[flat|nested] 72+ messages in thread

* [PATCH 02/11] vcs-svn: pass paths through to fast-import
  2011-03-21 23:49 ` [PATCHv2 00/11] vcs-svn: purge obsolete data structures and code David Barr
  2011-03-21 23:49   ` [PATCH 01/11] vcs-svn: use strbuf for revision log David Barr
@ 2011-03-21 23:49   ` David Barr
  2011-03-21 23:49   ` [PATCH 03/11] vcs-svn: avoid using ls command twice David Barr
                     ` (9 subsequent siblings)
  11 siblings, 0 replies; 72+ messages in thread
From: David Barr @ 2011-03-21 23:49 UTC (permalink / raw)
  To: Git Mailing List
  Cc: Jonathan Nieder, Ramkumar Ramachandra, Sverre Rabbelier,
	Sam Vilain, Stephen Bash, Tomas Carnecky, David Barr

Now that there is no internal representation of the repo,
it is not necessary to tokenise paths.

Use strbuf instead and bypass string_pool.

Also, since we now treat paths in their entirety,
only quote when necessary.

Signed-off-by: David Barr <david.barr@cordelta.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: David Barr <david.barr@cordelta.com>
Reviewed-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: David Barr <david.barr@cordelta.com>
---
 vcs-svn/fast_export.c |   47 ++++++++++++++++++++++++-----------------------
 vcs-svn/fast_export.h |    9 ++++-----
 vcs-svn/repo_tree.c   |   20 ++++++++++----------
 vcs-svn/repo_tree.h   |   13 +++++--------
 vcs-svn/svndump.c     |   37 ++++++++++++++++++++-----------------
 5 files changed, 63 insertions(+), 63 deletions(-)

diff --git a/vcs-svn/fast_export.c b/vcs-svn/fast_export.c
index f19db9a..bb5e9aa 100644
--- a/vcs-svn/fast_export.c
+++ b/vcs-svn/fast_export.c
@@ -4,6 +4,8 @@
  */
 
 #include "git-compat-util.h"
+#include "strbuf.h"
+#include "quote.h"
 #include "fast_export.h"
 #include "line_buffer.h"
 #include "repo_tree.h"
@@ -32,30 +34,30 @@ void fast_export_reset(void)
 	buffer_reset(&report_buffer);
 }
 
-void fast_export_delete(uint32_t depth, const uint32_t *path)
+void fast_export_delete(const char *path)
 {
-	printf("D \"");
-	pool_print_seq_q(depth, path, '/', stdout);
-	printf("\"\n");
+	putchar('D');
+	putchar(' ');
+	quote_c_style(path, NULL, stdout, 0);
+	putchar('\n');
 }
 
-static void fast_export_truncate(uint32_t depth, const uint32_t *path, uint32_t mode)
+static void fast_export_truncate(const char *path, uint32_t mode)
 {
-	fast_export_modify(depth, path, mode, "inline");
+	fast_export_modify(path, mode, "inline");
 	printf("data 0\n\n");
 }
 
-void fast_export_modify(uint32_t depth, const uint32_t *path, uint32_t mode,
-			const char *dataref)
+void fast_export_modify(const char *path, uint32_t mode, const char *dataref)
 {
 	/* Mode must be 100644, 100755, 120000, or 160000. */
 	if (!dataref) {
-		fast_export_truncate(depth, path, mode);
+		fast_export_truncate(path, mode);
 		return;
 	}
-	printf("M %06"PRIo32" %s \"", mode, dataref);
-	pool_print_seq_q(depth, path, '/', stdout);
-	printf("\"\n");
+	printf("M %06"PRIo32" %s ", mode, dataref);
+	quote_c_style(path, NULL, stdout, 0);
+	putchar('\n');
 }
 
 static char gitsvnline[MAX_GITSVN_LINE_LEN];
@@ -93,20 +95,20 @@ void fast_export_end_commit(uint32_t revision)
 	printf("progress Imported commit %"PRIu32".\n\n", revision);
 }
 
-static void ls_from_rev(uint32_t rev, uint32_t depth, const uint32_t *path)
+static void ls_from_rev(uint32_t rev, const char *path)
 {
 	/* ls :5 path/to/old/file */
-	printf("ls :%"PRIu32" \"", rev);
-	pool_print_seq_q(depth, path, '/', stdout);
-	printf("\"\n");
+	printf("ls :%"PRIu32" ", rev);
+	quote_c_style(path, NULL, stdout, 0);
+	putchar('\n');
 	fflush(stdout);
 }
 
-static void ls_from_active_commit(uint32_t depth, const uint32_t *path)
+static void ls_from_active_commit(const char *path)
 {
 	/* ls "path/to/file" */
 	printf("ls \"");
-	pool_print_seq_q(depth, path, '/', stdout);
+	quote_c_style(path, NULL, stdout, 1);
 	printf("\"\n");
 	fflush(stdout);
 }
@@ -174,16 +176,15 @@ static int parse_ls_response(const char *response, uint32_t *mode,
 	return 0;
 }
 
-int fast_export_ls_rev(uint32_t rev, uint32_t depth, const uint32_t *path,
+int fast_export_ls_rev(uint32_t rev, const char *path,
 				uint32_t *mode, struct strbuf *dataref)
 {
-	ls_from_rev(rev, depth, path);
+	ls_from_rev(rev, path);
 	return parse_ls_response(get_response_line(), mode, dataref);
 }
 
-int fast_export_ls(uint32_t depth, const uint32_t *path,
-				uint32_t *mode, struct strbuf *dataref)
+int fast_export_ls(const char *path, uint32_t *mode, struct strbuf *dataref)
 {
-	ls_from_active_commit(depth, path);
+	ls_from_active_commit(path);
 	return parse_ls_response(get_response_line(), mode, dataref);
 }
diff --git a/vcs-svn/fast_export.h b/vcs-svn/fast_export.h
index 633d219..a47c609 100644
--- a/vcs-svn/fast_export.h
+++ b/vcs-svn/fast_export.h
@@ -8,18 +8,17 @@ void fast_export_init(int fd);
 void fast_export_deinit(void);
 void fast_export_reset(void);
 
-void fast_export_delete(uint32_t depth, const uint32_t *path);
-void fast_export_modify(uint32_t depth, const uint32_t *path,
-			uint32_t mode, const char *dataref);
+void fast_export_delete(const char *path);
+void fast_export_modify(const char *path, uint32_t mode, const char *dataref);
 void fast_export_begin_commit(uint32_t revision, uint32_t author, char *log,
 			uint32_t uuid, uint32_t url, unsigned long timestamp);
 void fast_export_end_commit(uint32_t revision);
 void fast_export_data(uint32_t mode, uint32_t len, struct line_buffer *input);
 
 /* If there is no such file at that rev, returns -1, errno == ENOENT. */
-int fast_export_ls_rev(uint32_t rev, uint32_t depth, const uint32_t *path,
+int fast_export_ls_rev(uint32_t rev, const char *path,
 			uint32_t *mode_out, struct strbuf *dataref_out);
-int fast_export_ls(uint32_t depth, const uint32_t *path,
+int fast_export_ls(const char *path,
 			uint32_t *mode_out, struct strbuf *dataref_out);
 
 #endif
diff --git a/vcs-svn/repo_tree.c b/vcs-svn/repo_tree.c
index e75f580..f2466bc 100644
--- a/vcs-svn/repo_tree.c
+++ b/vcs-svn/repo_tree.c
@@ -8,14 +8,14 @@
 #include "repo_tree.h"
 #include "fast_export.h"
 
-const char *repo_read_path(const uint32_t *path)
+const char *repo_read_path(const char *path)
 {
 	int err;
 	uint32_t dummy;
 	static struct strbuf buf = STRBUF_INIT;
 
 	strbuf_reset(&buf);
-	err = fast_export_ls(REPO_MAX_PATH_DEPTH, path, &dummy, &buf);
+	err = fast_export_ls(path, &dummy, &buf);
 	if (err) {
 		if (errno != ENOENT)
 			die_errno("BUG: unexpected fast_export_ls error");
@@ -24,14 +24,14 @@ const char *repo_read_path(const uint32_t *path)
 	return buf.buf;
 }
 
-uint32_t repo_read_mode(const uint32_t *path)
+uint32_t repo_read_mode(const char *path)
 {
 	int err;
 	uint32_t result;
 	static struct strbuf dummy = STRBUF_INIT;
 
 	strbuf_reset(&dummy);
-	err = fast_export_ls(REPO_MAX_PATH_DEPTH, path, &result, &dummy);
+	err = fast_export_ls(path, &result, &dummy);
 	if (err) {
 		if (errno != ENOENT)
 			die_errno("BUG: unexpected fast_export_ls error");
@@ -41,24 +41,24 @@ uint32_t repo_read_mode(const uint32_t *path)
 	return result;
 }
 
-void repo_copy(uint32_t revision, const uint32_t *src, const uint32_t *dst)
+void repo_copy(uint32_t revision, const char *src, const char *dst)
 {
 	int err;
 	uint32_t mode;
 	static struct strbuf data = STRBUF_INIT;
 
 	strbuf_reset(&data);
-	err = fast_export_ls_rev(revision, REPO_MAX_PATH_DEPTH, src, &mode, &data);
+	err = fast_export_ls_rev(revision, src, &mode, &data);
 	if (err) {
 		if (errno != ENOENT)
 			die_errno("BUG: unexpected fast_export_ls_rev error");
-		fast_export_delete(REPO_MAX_PATH_DEPTH, dst);
+		fast_export_delete(dst);
 		return;
 	}
-	fast_export_modify(REPO_MAX_PATH_DEPTH, dst, mode, data.buf);
+	fast_export_modify(dst, mode, data.buf);
 }
 
-void repo_delete(uint32_t *path)
+void repo_delete(const char *path)
 {
-	fast_export_delete(REPO_MAX_PATH_DEPTH, path);
+	fast_export_delete(path);
 }
diff --git a/vcs-svn/repo_tree.h b/vcs-svn/repo_tree.h
index d690784..af2415c 100644
--- a/vcs-svn/repo_tree.h
+++ b/vcs-svn/repo_tree.h
@@ -8,15 +8,12 @@
 #define REPO_MODE_EXE 0100755
 #define REPO_MODE_LNK 0120000
 
-#define REPO_MAX_PATH_LEN 4096
-#define REPO_MAX_PATH_DEPTH 1000
-
 uint32_t next_blob_mark(void);
-void repo_copy(uint32_t revision, const uint32_t *src, const uint32_t *dst);
-void repo_add(uint32_t *path, uint32_t mode, uint32_t blob_mark);
-const char *repo_read_path(const uint32_t *path);
-uint32_t repo_read_mode(const uint32_t *path);
-void repo_delete(uint32_t *path);
+void repo_copy(uint32_t revision, const char *src, const char *dst);
+void repo_add(const char *path, uint32_t mode, uint32_t blob_mark);
+const char *repo_read_path(const char *path);
+uint32_t repo_read_mode(const char *path);
+void repo_delete(const char *path);
 void repo_commit(uint32_t revision, uint32_t author, char *log, uint32_t uuid,
 		 uint32_t url, long unsigned timestamp);
 void repo_diff(uint32_t r1, uint32_t r2);
diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c
index da154ad..afdfc63 100644
--- a/vcs-svn/svndump.c
+++ b/vcs-svn/svndump.c
@@ -37,7 +37,7 @@ static struct line_buffer input = LINE_BUFFER_INIT;
 
 static struct {
 	uint32_t action, propLength, textLength, srcRev, type;
-	uint32_t src[REPO_MAX_PATH_DEPTH], dst[REPO_MAX_PATH_DEPTH];
+	struct strbuf src, dst;
 	uint32_t text_delta, prop_delta;
 } node_ctx;
 
@@ -66,9 +66,11 @@ static void reset_node_ctx(char *fname)
 	node_ctx.action = NODEACT_UNKNOWN;
 	node_ctx.propLength = LENGTH_UNKNOWN;
 	node_ctx.textLength = LENGTH_UNKNOWN;
-	node_ctx.src[0] = ~0;
+	strbuf_reset(&node_ctx.src);
 	node_ctx.srcRev = 0;
-	pool_tok_seq(REPO_MAX_PATH_DEPTH, node_ctx.dst, "/", fname);
+	strbuf_reset(&node_ctx.dst);
+	if (fname)
+		strbuf_addstr(&node_ctx.dst, fname);
 	node_ctx.text_delta = 0;
 	node_ctx.prop_delta = 0;
 }
@@ -211,14 +213,14 @@ static void handle_node(void)
 		if (have_text || have_props || node_ctx.srcRev)
 			die("invalid dump: deletion node has "
 				"copyfrom info, text, or properties");
-		return repo_delete(node_ctx.dst);
+		return repo_delete(node_ctx.dst.buf);
 	}
 	if (node_ctx.action == NODEACT_REPLACE) {
-		repo_delete(node_ctx.dst);
+		repo_delete(node_ctx.dst.buf);
 		node_ctx.action = NODEACT_ADD;
 	}
 	if (node_ctx.srcRev) {
-		repo_copy(node_ctx.srcRev, node_ctx.src, node_ctx.dst);
+		repo_copy(node_ctx.srcRev, node_ctx.src.buf, node_ctx.dst.buf);
 		if (node_ctx.action == NODEACT_ADD)
 			node_ctx.action = NODEACT_CHANGE;
 	}
@@ -228,14 +230,14 @@ static void handle_node(void)
 	/*
 	 * Find old content (old_data) and decide on the new mode.
 	 */
-	if (node_ctx.action == NODEACT_CHANGE && !~*node_ctx.dst) {
+	if (node_ctx.action == NODEACT_CHANGE && !*node_ctx.dst.buf) {
 		if (type != REPO_MODE_DIR)
 			die("invalid dump: root of tree is not a regular file");
 		old_data = NULL;
 	} else if (node_ctx.action == NODEACT_CHANGE) {
 		uint32_t mode;
-		old_data = repo_read_path(node_ctx.dst);
-		mode = repo_read_mode(node_ctx.dst);
+		old_data = repo_read_path(node_ctx.dst.buf);
+		mode = repo_read_mode(node_ctx.dst.buf);
 		if (mode == REPO_MODE_DIR && type != REPO_MODE_DIR)
 			die("invalid dump: cannot modify a directory into a file");
 		if (mode != REPO_MODE_DIR && type == REPO_MODE_DIR)
@@ -272,12 +274,10 @@ static void handle_node(void)
 		/* For the fast_export_* functions, NULL means empty. */
 		old_data = NULL;
 	if (!have_text) {
-		fast_export_modify(REPO_MAX_PATH_DEPTH, node_ctx.dst,
-					node_ctx.type, old_data);
+		fast_export_modify(node_ctx.dst.buf, node_ctx.type, old_data);
 		return;
 	}
-	fast_export_modify(REPO_MAX_PATH_DEPTH, node_ctx.dst,
-				node_ctx.type, "inline");
+	fast_export_modify(node_ctx.dst.buf, node_ctx.type, "inline");
 	fast_export_data(node_ctx.type, node_ctx.textLength, &input);
 }
 
@@ -356,7 +356,8 @@ void svndump_read(const char *url)
 				node_ctx.action = NODEACT_UNKNOWN;
 			}
 		} else if (key == keys.node_copyfrom_path) {
-			pool_tok_seq(REPO_MAX_PATH_DEPTH, node_ctx.src, "/", val);
+			strbuf_reset(&node_ctx.src);
+			strbuf_addstr(&node_ctx.src, val);
 		} else if (key == keys.node_copyfrom_rev) {
 			node_ctx.srcRev = atoi(val);
 		} else if (key == keys.text_content_length) {
@@ -395,6 +396,8 @@ int svndump_init(const char *filename)
 		return error("cannot open %s: %s", filename, strerror(errno));
 	fast_export_init(REPORT_FILENO);
 	strbuf_init(&rev_ctx.log, 4096);
+	strbuf_init(&node_ctx.src, 4096);
+	strbuf_init(&node_ctx.dst, 4096);
 	reset_dump_ctx(~0);
 	reset_rev_ctx(0);
 	reset_node_ctx(NULL);
@@ -409,6 +412,8 @@ void svndump_deinit(void)
 	reset_rev_ctx(0);
 	reset_node_ctx(NULL);
 	strbuf_release(&rev_ctx.log);
+	strbuf_release(&node_ctx.src);
+	strbuf_release(&node_ctx.dst);
 	if (buffer_deinit(&input))
 		fprintf(stderr, "Input error\n");
 	if (ferror(stdout))
@@ -419,7 +424,5 @@ void svndump_reset(void)
 {
 	fast_export_reset();
 	buffer_reset(&input);
-	reset_dump_ctx(~0);
-	reset_rev_ctx(0);
-	reset_node_ctx(NULL);
+	pool_reset();
 }
-- 
1.7.3.2.846.gf4b062

^ permalink raw reply related	[flat|nested] 72+ messages in thread

* [PATCH 03/11] vcs-svn: avoid using ls command twice
  2011-03-21 23:49 ` [PATCHv2 00/11] vcs-svn: purge obsolete data structures and code David Barr
  2011-03-21 23:49   ` [PATCH 01/11] vcs-svn: use strbuf for revision log David Barr
  2011-03-21 23:49   ` [PATCH 02/11] vcs-svn: pass paths through to fast-import David Barr
@ 2011-03-21 23:49   ` David Barr
  2011-03-21 23:49   ` [PATCH 04/11] vcs-svn: implement perfect hash for node-prop keys David Barr
                     ` (8 subsequent siblings)
  11 siblings, 0 replies; 72+ messages in thread
From: David Barr @ 2011-03-21 23:49 UTC (permalink / raw)
  To: Git Mailing List
  Cc: Jonathan Nieder, Ramkumar Ramachandra, Sverre Rabbelier,
	Sam Vilain, Stephen Bash, Tomas Carnecky, David Barr

Signed-off-by: David Barr <david.barr@cordelta.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: David Barr <david.barr@cordelta.com>
Reviewed-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: David Barr <david.barr@cordelta.com>
---
 vcs-svn/repo_tree.c |   24 ++++--------------------
 vcs-svn/repo_tree.h |    3 +--
 vcs-svn/svndump.c   |    3 +--
 3 files changed, 6 insertions(+), 24 deletions(-)

diff --git a/vcs-svn/repo_tree.c b/vcs-svn/repo_tree.c
index f2466bc..67d27f0 100644
--- a/vcs-svn/repo_tree.c
+++ b/vcs-svn/repo_tree.c
@@ -8,39 +8,23 @@
 #include "repo_tree.h"
 #include "fast_export.h"
 
-const char *repo_read_path(const char *path)
+const char *repo_read_path(const char *path, uint32_t *mode_out)
 {
 	int err;
-	uint32_t dummy;
 	static struct strbuf buf = STRBUF_INIT;
 
 	strbuf_reset(&buf);
-	err = fast_export_ls(path, &dummy, &buf);
+	err = fast_export_ls(path, mode_out, &buf);
 	if (err) {
 		if (errno != ENOENT)
 			die_errno("BUG: unexpected fast_export_ls error");
+		/* Treat missing paths as directories. */
+		*mode_out = REPO_MODE_DIR;
 		return NULL;
 	}
 	return buf.buf;
 }
 
-uint32_t repo_read_mode(const char *path)
-{
-	int err;
-	uint32_t result;
-	static struct strbuf dummy = STRBUF_INIT;
-
-	strbuf_reset(&dummy);
-	err = fast_export_ls(path, &result, &dummy);
-	if (err) {
-		if (errno != ENOENT)
-			die_errno("BUG: unexpected fast_export_ls error");
-		/* Treat missing paths as directories. */
-		return REPO_MODE_DIR;
-	}
-	return result;
-}
-
 void repo_copy(uint32_t revision, const char *src, const char *dst)
 {
 	int err;
diff --git a/vcs-svn/repo_tree.h b/vcs-svn/repo_tree.h
index af2415c..eb003e6 100644
--- a/vcs-svn/repo_tree.h
+++ b/vcs-svn/repo_tree.h
@@ -11,8 +11,7 @@
 uint32_t next_blob_mark(void);
 void repo_copy(uint32_t revision, const char *src, const char *dst);
 void repo_add(const char *path, uint32_t mode, uint32_t blob_mark);
-const char *repo_read_path(const char *path);
-uint32_t repo_read_mode(const char *path);
+const char *repo_read_path(const char *path, uint32_t *mode_out);
 void repo_delete(const char *path);
 void repo_commit(uint32_t revision, uint32_t author, char *log, uint32_t uuid,
 		 uint32_t url, long unsigned timestamp);
diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c
index afdfc63..15b173e 100644
--- a/vcs-svn/svndump.c
+++ b/vcs-svn/svndump.c
@@ -236,8 +236,7 @@ static void handle_node(void)
 		old_data = NULL;
 	} else if (node_ctx.action == NODEACT_CHANGE) {
 		uint32_t mode;
-		old_data = repo_read_path(node_ctx.dst.buf);
-		mode = repo_read_mode(node_ctx.dst.buf);
+		old_data = repo_read_path(node_ctx.dst.buf, &mode);
 		if (mode == REPO_MODE_DIR && type != REPO_MODE_DIR)
 			die("invalid dump: cannot modify a directory into a file");
 		if (mode != REPO_MODE_DIR && type == REPO_MODE_DIR)
-- 
1.7.3.2.846.gf4b062

^ permalink raw reply related	[flat|nested] 72+ messages in thread

* [PATCH 04/11] vcs-svn: implement perfect hash for node-prop keys
  2011-03-21 23:49 ` [PATCHv2 00/11] vcs-svn: purge obsolete data structures and code David Barr
                     ` (2 preceding siblings ...)
  2011-03-21 23:49   ` [PATCH 03/11] vcs-svn: avoid using ls command twice David Barr
@ 2011-03-21 23:49   ` David Barr
  2011-03-21 23:49   ` [PATCH 05/11] vcs-svn: implement perfect hash for top-level keys David Barr
                     ` (7 subsequent siblings)
  11 siblings, 0 replies; 72+ messages in thread
From: David Barr @ 2011-03-21 23:49 UTC (permalink / raw)
  To: Git Mailing List
  Cc: Jonathan Nieder, Ramkumar Ramachandra, Sverre Rabbelier,
	Sam Vilain, Stephen Bash, Tomas Carnecky, David Barr

This eliminates one more dependency on string_pool.

Signed-off-by: David Barr <david.barr@cordelta.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: David Barr <david.barr@cordelta.com>
Reviewed-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: David Barr <david.barr@cordelta.com>
---
 vcs-svn/svndump.c |   57 +++++++++++++++++++++++++++++++++++-----------------
 1 files changed, 38 insertions(+), 19 deletions(-)

diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c
index 15b173e..46ae5fa 100644
--- a/vcs-svn/svndump.c
+++ b/vcs-svn/svndump.c
@@ -52,8 +52,7 @@ static struct {
 } dump_ctx;
 
 static struct {
-	uint32_t svn_log, svn_author, svn_date, svn_executable, svn_special, uuid,
-		revision_number, node_path, node_kind, node_action,
+	uint32_t uuid, revision_number, node_path, node_kind, node_action,
 		node_copyfrom_path, node_copyfrom_rev, text_content_length,
 		prop_content_length, content_length, svn_fs_dump_format_version,
 		/* version 3 format */
@@ -92,11 +91,6 @@ static void reset_dump_ctx(uint32_t url)
 
 static void init_keys(void)
 {
-	keys.svn_log = pool_intern("svn:log");
-	keys.svn_author = pool_intern("svn:author");
-	keys.svn_date = pool_intern("svn:date");
-	keys.svn_executable = pool_intern("svn:executable");
-	keys.svn_special = pool_intern("svn:special");
 	keys.uuid = pool_intern("UUID");
 	keys.revision_number = pool_intern("Revision-number");
 	keys.node_path = pool_intern("Node-path");
@@ -113,22 +107,44 @@ static void init_keys(void)
 	keys.prop_delta = pool_intern("Prop-delta");
 }
 
-static void handle_property(uint32_t key, const char *val, uint32_t len,
+/* Compare string to literal of equal length; must be guarded by length test. */
+#define constcmp(s, ref) memcmp((s), (ref), sizeof(ref) - 1)
+
+static void handle_property(struct strbuf *key_buf, const char *val, uint32_t len,
 				uint32_t *type_set)
 {
-	if (key == keys.svn_log) {
+	const int sizeof_key = key_buf->len + 1;
+	const char *key = key_buf->buf;
+	switch (sizeof_key) {
+	case sizeof("svn:log"):
+		if (constcmp(key, "svn:log"))
+			break;
 		if (!val)
 			die("invalid dump: unsets svn:log");
 		/* Value length excludes terminating nul. */
 		strbuf_add(&rev_ctx.log, val, len + 1);
-	} else if (key == keys.svn_author) {
+		break;
+	case sizeof("svn:author"):
+		if (constcmp(key, "svn:author"))
+			break;
 		rev_ctx.author = pool_intern(val);
-	} else if (key == keys.svn_date) {
+		break;
+	case sizeof("svn:date"):
+		if (constcmp(key, "svn:date"))
+			break;
 		if (!val)
 			die("invalid dump: unsets svn:date");
 		if (parse_date_basic(val, &rev_ctx.timestamp, NULL))
 			warning("invalid timestamp: %s", val);
-	} else if (key == keys.svn_executable || key == keys.svn_special) {
+		break;
+	case sizeof("svn:executable"):
+	case sizeof("svn:special"):
+		if (sizeof_key == sizeof("svn:executable") &&
+		    constcmp(key, "svn:executable"))
+			break;
+		if (sizeof_key == sizeof("svn:special") &&
+		    constcmp(key, "svn:special"))
+			break;
 		if (*type_set) {
 			if (!val)
 				return;
@@ -139,7 +155,7 @@ static void handle_property(uint32_t key, const char *val, uint32_t len,
 			return;
 		}
 		*type_set = 1;
-		node_ctx.type = key == keys.svn_executable ?
+		node_ctx.type = sizeof_key == sizeof("svn:executable") ?
 				REPO_MODE_EXE :
 				REPO_MODE_LNK;
 	}
@@ -147,7 +163,7 @@ static void handle_property(uint32_t key, const char *val, uint32_t len,
 
 static void read_props(void)
 {
-	uint32_t key = ~0;
+	static struct strbuf key = STRBUF_INIT;
 	const char *t;
 	/*
 	 * NEEDSWORK: to support simple mode changes like
@@ -175,16 +191,19 @@ static void read_props(void)
 
 		switch (type) {
 		case 'K':
-			key = pool_intern(val);
-			continue;
 		case 'D':
-			key = pool_intern(val);
+			strbuf_reset(&key);
+			if (val)
+				strbuf_add(&key, val, len);
+			if (type == 'K')
+				continue;
+			assert(type == 'D');
 			val = NULL;
 			len = 0;
 			/* fall through */
 		case 'V':
-			handle_property(key, val, len, &type_set);
-			key = ~0;
+			handle_property(&key, val, len, &type_set);
+			strbuf_reset(&key);
 			continue;
 		default:
 			die("invalid property line: %s\n", t);
-- 
1.7.3.2.846.gf4b062

^ permalink raw reply related	[flat|nested] 72+ messages in thread

* [PATCH 05/11] vcs-svn: implement perfect hash for top-level keys
  2011-03-21 23:49 ` [PATCHv2 00/11] vcs-svn: purge obsolete data structures and code David Barr
                     ` (3 preceding siblings ...)
  2011-03-21 23:49   ` [PATCH 04/11] vcs-svn: implement perfect hash for node-prop keys David Barr
@ 2011-03-21 23:49   ` David Barr
  2011-03-21 23:49   ` [PATCH 06/11] vcs-svn: use switch rather than cascading ifs David Barr
                     ` (6 subsequent siblings)
  11 siblings, 0 replies; 72+ messages in thread
From: David Barr @ 2011-03-21 23:49 UTC (permalink / raw)
  To: Git Mailing List
  Cc: Jonathan Nieder, Ramkumar Ramachandra, Sverre Rabbelier,
	Sam Vilain, Stephen Bash, Tomas Carnecky, David Barr

This eliminates one more dependency on string_pool.

Signed-off-by: David Barr <david.barr@cordelta.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: David Barr <david.barr@cordelta.com>
Reviewed-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: David Barr <david.barr@cordelta.com>
---
 vcs-svn/svndump.c |  110 +++++++++++++++++++++++++++++------------------------
 1 files changed, 60 insertions(+), 50 deletions(-)

diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c
index 46ae5fa..3ad48e5 100644
--- a/vcs-svn/svndump.c
+++ b/vcs-svn/svndump.c
@@ -51,14 +51,6 @@ static struct {
 	uint32_t version, uuid, url;
 } dump_ctx;
 
-static struct {
-	uint32_t uuid, revision_number, node_path, node_kind, node_action,
-		node_copyfrom_path, node_copyfrom_rev, text_content_length,
-		prop_content_length, content_length, svn_fs_dump_format_version,
-		/* version 3 format */
-		text_delta, prop_delta;
-} keys;
-
 static void reset_node_ctx(char *fname)
 {
 	node_ctx.type = 0;
@@ -89,24 +81,6 @@ static void reset_dump_ctx(uint32_t url)
 	dump_ctx.uuid = ~0;
 }
 
-static void init_keys(void)
-{
-	keys.uuid = pool_intern("UUID");
-	keys.revision_number = pool_intern("Revision-number");
-	keys.node_path = pool_intern("Node-path");
-	keys.node_kind = pool_intern("Node-kind");
-	keys.node_action = pool_intern("Node-action");
-	keys.node_copyfrom_path = pool_intern("Node-copyfrom-path");
-	keys.node_copyfrom_rev = pool_intern("Node-copyfrom-rev");
-	keys.text_content_length = pool_intern("Text-content-length");
-	keys.prop_content_length = pool_intern("Prop-content-length");
-	keys.content_length = pool_intern("Content-length");
-	keys.svn_fs_dump_format_version = pool_intern("SVN-fs-dump-format-version");
-	/* version 3 format (Subversion 1.1.0) */
-	keys.text_delta = pool_intern("Text-delta");
-	keys.prop_delta = pool_intern("Prop-delta");
-}
-
 /* Compare string to literal of equal length; must be guarded by length test. */
 #define constcmp(s, ref) memcmp((s), (ref), sizeof(ref) - 1)
 
@@ -319,7 +293,6 @@ void svndump_read(const char *url)
 	char *t;
 	uint32_t active_ctx = DUMP_CTX;
 	uint32_t len;
-	uint32_t key;
 
 	reset_dump_ctx(pool_intern(url));
 	while ((t = buffer_read_line(&input))) {
@@ -328,16 +301,25 @@ void svndump_read(const char *url)
 			continue;
 		*val++ = '\0';
 		*val++ = '\0';
-		key = pool_intern(t);
 
-		if (key == keys.svn_fs_dump_format_version) {
+		/* strlen(key) + 1 */
+		switch (val - t - 1) {
+		case sizeof("SVN-fs-dump-format-version"):
+			if (constcmp(t, "SVN-fs-dump-format-version"))
+				continue;
 			dump_ctx.version = atoi(val);
 			if (dump_ctx.version > 3)
 				die("expected svn dump format version <= 3, found %"PRIu32,
 				    dump_ctx.version);
-		} else if (key == keys.uuid) {
+			break;
+		case sizeof("UUID"):
+			if (constcmp(t, "UUID"))
+				continue;
 			dump_ctx.uuid = pool_intern(val);
-		} else if (key == keys.revision_number) {
+			break;
+		case sizeof("Revision-number"):
+			if (constcmp(t, "Revision-number"))
+				continue;
 			if (active_ctx == NODE_CTX)
 				handle_node();
 			if (active_ctx == REV_CTX)
@@ -346,21 +328,31 @@ void svndump_read(const char *url)
 				end_revision();
 			active_ctx = REV_CTX;
 			reset_rev_ctx(atoi(val));
-		} else if (key == keys.node_path) {
-			if (active_ctx == NODE_CTX)
-				handle_node();
-			if (active_ctx == REV_CTX)
-				begin_revision();
-			active_ctx = NODE_CTX;
-			reset_node_ctx(val);
-		} else if (key == keys.node_kind) {
+			break;
+		case sizeof("Node-path"):
+			if (prefixcmp(t, "Node-"))
+				continue;
+			if (!constcmp(t + strlen("Node-"), "path")) {
+				if (active_ctx == NODE_CTX)
+					handle_node();
+				if (active_ctx == REV_CTX)
+					begin_revision();
+				active_ctx = NODE_CTX;
+				reset_node_ctx(val);
+				break;
+			}
+			if (constcmp(t + strlen("Node-"), "kind"))
+				continue;
 			if (!strcmp(val, "dir"))
 				node_ctx.type = REPO_MODE_DIR;
 			else if (!strcmp(val, "file"))
 				node_ctx.type = REPO_MODE_BLB;
 			else
 				fprintf(stderr, "Unknown node-kind: %s\n", val);
-		} else if (key == keys.node_action) {
+			break;
+		case sizeof("Node-action"):
+			if (constcmp(t, "Node-action"))
+				continue;
 			if (!strcmp(val, "delete")) {
 				node_ctx.action = NODEACT_DELETE;
 			} else if (!strcmp(val, "add")) {
@@ -373,20 +365,39 @@ void svndump_read(const char *url)
 				fprintf(stderr, "Unknown node-action: %s\n", val);
 				node_ctx.action = NODEACT_UNKNOWN;
 			}
-		} else if (key == keys.node_copyfrom_path) {
+			break;
+		case sizeof("Node-copyfrom-path"):
+			if (constcmp(t, "Node-copyfrom-path"))
+				continue;
 			strbuf_reset(&node_ctx.src);
 			strbuf_addstr(&node_ctx.src, val);
-		} else if (key == keys.node_copyfrom_rev) {
+			break;
+		case sizeof("Node-copyfrom-rev"):
+			if (constcmp(t, "Node-copyfrom-rev"))
+				continue;
 			node_ctx.srcRev = atoi(val);
-		} else if (key == keys.text_content_length) {
-			node_ctx.textLength = atoi(val);
-		} else if (key == keys.prop_content_length) {
+			break;
+		case sizeof("Text-content-length"):
+			if (!constcmp(t, "Text-content-length")) {
+				node_ctx.textLength = atoi(val);
+				break;
+			}
+			if (constcmp(t, "Prop-content-length"))
+				continue;
 			node_ctx.propLength = atoi(val);
-		} else if (key == keys.text_delta) {
-			node_ctx.text_delta = !strcmp(val, "true");
-		} else if (key == keys.prop_delta) {
+			break;
+		case sizeof("Text-delta"):
+			if (!constcmp(t, "Text-delta")) {
+				node_ctx.text_delta = !strcmp(val, "true");
+				break;
+			}
+			if (constcmp(t, "Prop-delta"))
+				continue;
 			node_ctx.prop_delta = !strcmp(val, "true");
-		} else if (key == keys.content_length) {
+			break;
+		case sizeof("Content-length"):
+			if (constcmp(t, "Content-length"))
+				continue;
 			len = atoi(val);
 			buffer_read_line(&input);
 			if (active_ctx == REV_CTX) {
@@ -419,7 +430,6 @@ int svndump_init(const char *filename)
 	reset_dump_ctx(~0);
 	reset_rev_ctx(0);
 	reset_node_ctx(NULL);
-	init_keys();
 	return 0;
 }
 
-- 
1.7.3.2.846.gf4b062

^ permalink raw reply related	[flat|nested] 72+ messages in thread

* [PATCH 06/11] vcs-svn: use switch rather than cascading ifs
  2011-03-21 23:49 ` [PATCHv2 00/11] vcs-svn: purge obsolete data structures and code David Barr
                     ` (4 preceding siblings ...)
  2011-03-21 23:49   ` [PATCH 05/11] vcs-svn: implement perfect hash for top-level keys David Barr
@ 2011-03-21 23:49   ` David Barr
  2011-03-21 23:49   ` [PATCH 07/11] vcs-svn: factor out usage of string_pool David Barr
                     ` (5 subsequent siblings)
  11 siblings, 0 replies; 72+ messages in thread
From: David Barr @ 2011-03-21 23:49 UTC (permalink / raw)
  To: Git Mailing List
  Cc: Jonathan Nieder, Ramkumar Ramachandra, Sverre Rabbelier,
	Sam Vilain, Stephen Bash, Tomas Carnecky, David Barr

In the spirit of the last two changes:
Switch on length and use constcmp for parsing headers with restricted values.

Signed-off-by: David Barr <david.barr@cordelta.com>
Reviewed-by: Jonathan Nieder <jrnieder@gmail.com>
---
 vcs-svn/svndump.c |   40 ++++++++++++++++++++++++++++++----------
 1 files changed, 30 insertions(+), 10 deletions(-)

diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c
index 3ad48e5..7b5b5ec 100644
--- a/vcs-svn/svndump.c
+++ b/vcs-svn/svndump.c
@@ -343,25 +343,45 @@ void svndump_read(const char *url)
 			}
 			if (constcmp(t + strlen("Node-"), "kind"))
 				continue;
-			if (!strcmp(val, "dir"))
+			switch (strlen(val) + 1) {
+			case sizeof("dir"):
+				if (constcmp(val, "dir"))
+					break;
 				node_ctx.type = REPO_MODE_DIR;
-			else if (!strcmp(val, "file"))
+				break;
+			case sizeof("file"):
+				if (constcmp(val, "file"))
+					break;
 				node_ctx.type = REPO_MODE_BLB;
-			else
+				break;
+			default:
 				fprintf(stderr, "Unknown node-kind: %s\n", val);
+			}
 			break;
 		case sizeof("Node-action"):
 			if (constcmp(t, "Node-action"))
 				continue;
-			if (!strcmp(val, "delete")) {
-				node_ctx.action = NODEACT_DELETE;
-			} else if (!strcmp(val, "add")) {
+			switch (strlen(val) + 1) {
+			case sizeof("add"):
+				if (constcmp(val, "add"))
+					break;
 				node_ctx.action = NODEACT_ADD;
-			} else if (!strcmp(val, "change")) {
-				node_ctx.action = NODEACT_CHANGE;
-			} else if (!strcmp(val, "replace")) {
+				break;
+			case sizeof("change"):
+				if (!constcmp(val, "change")) {
+					node_ctx.action = NODEACT_CHANGE;
+					break;
+				}
+				if (constcmp(val, "delete"))
+					break;
+				node_ctx.action = NODEACT_DELETE;
+				break;
+			case sizeof("replace"):
+				if (constcmp(val, "replace"))
+					break;
 				node_ctx.action = NODEACT_REPLACE;
-			} else {
+				break;
+			default:
 				fprintf(stderr, "Unknown node-action: %s\n", val);
 				node_ctx.action = NODEACT_UNKNOWN;
 			}
-- 
1.7.3.2.846.gf4b062

^ permalink raw reply related	[flat|nested] 72+ messages in thread

* [PATCH 07/11] vcs-svn: factor out usage of string_pool
  2011-03-21 23:49 ` [PATCHv2 00/11] vcs-svn: purge obsolete data structures and code David Barr
                     ` (5 preceding siblings ...)
  2011-03-21 23:49   ` [PATCH 06/11] vcs-svn: use switch rather than cascading ifs David Barr
@ 2011-03-21 23:49   ` David Barr
  2011-03-21 23:49   ` [PATCH 08/11] vcs-svn: drop string_pool David Barr
                     ` (4 subsequent siblings)
  11 siblings, 0 replies; 72+ messages in thread
From: David Barr @ 2011-03-21 23:49 UTC (permalink / raw)
  To: Git Mailing List
  Cc: Jonathan Nieder, Ramkumar Ramachandra, Sverre Rabbelier,
	Sam Vilain, Stephen Bash, Tomas Carnecky, David Barr

That is, use strbufs and strings instead of interned
strings for values of rev, dump, and node fields that
happen to be strings.  After this change, there are
no more users of the string-pool library left.

There is a small functional change inlined: test for
emtpy rather than NULL when falling back to defaults
for commit metadata.

Signed-off-by: David Barr <david.barr@cordelta.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: David Barr <david.barr@cordelta.com>
Reviewed-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: David Barr <david.barr@cordelta.com>
---
 vcs-svn/fast_export.c |   17 +++++++----------
 vcs-svn/fast_export.h |    5 +++--
 vcs-svn/svndump.c     |   46 ++++++++++++++++++++++++++++++----------------
 3 files changed, 40 insertions(+), 28 deletions(-)

diff --git a/vcs-svn/fast_export.c b/vcs-svn/fast_export.c
index bb5e9aa..1d50512 100644
--- a/vcs-svn/fast_export.c
+++ b/vcs-svn/fast_export.c
@@ -9,7 +9,6 @@
 #include "fast_export.h"
 #include "line_buffer.h"
 #include "repo_tree.h"
-#include "string_pool.h"
 #include "strbuf.h"
 
 #define MAX_GITSVN_LINE_LEN 4096
@@ -61,25 +60,23 @@ void fast_export_modify(const char *path, uint32_t mode, const char *dataref)
 }
 
 static char gitsvnline[MAX_GITSVN_LINE_LEN];
-void fast_export_begin_commit(uint32_t revision, uint32_t author, char *log,
-			uint32_t uuid, uint32_t url,
+void fast_export_begin_commit(uint32_t revision, const char *author,
+			const char *log, const char *uuid, const char *url,
 			unsigned long timestamp)
 {
-	if (!log)
-		log = "";
-	if (~uuid && ~url) {
+	if (*uuid && *url) {
 		snprintf(gitsvnline, MAX_GITSVN_LINE_LEN,
 				"\n\ngit-svn-id: %s@%"PRIu32" %s\n",
-				 pool_fetch(url), revision, pool_fetch(uuid));
+				 url, revision, uuid);
 	} else {
 		*gitsvnline = '\0';
 	}
 	printf("commit refs/heads/master\n");
 	printf("mark :%"PRIu32"\n", revision);
 	printf("committer %s <%s@%s> %ld +0000\n",
-		   ~author ? pool_fetch(author) : "nobody",
-		   ~author ? pool_fetch(author) : "nobody",
-		   ~uuid ? pool_fetch(uuid) : "local", timestamp);
+		   *author ? author : "nobody",
+		   *author ? author : "nobody",
+		   *uuid ? uuid : "local", timestamp);
 	printf("data %"PRIu32"\n%s%s\n",
 		   (uint32_t) (strlen(log) + strlen(gitsvnline)),
 		   log, gitsvnline);
diff --git a/vcs-svn/fast_export.h b/vcs-svn/fast_export.h
index a47c609..bc5bddf 100644
--- a/vcs-svn/fast_export.h
+++ b/vcs-svn/fast_export.h
@@ -10,8 +10,9 @@ void fast_export_reset(void);
 
 void fast_export_delete(const char *path);
 void fast_export_modify(const char *path, uint32_t mode, const char *dataref);
-void fast_export_begin_commit(uint32_t revision, uint32_t author, char *log,
-			uint32_t uuid, uint32_t url, unsigned long timestamp);
+void fast_export_begin_commit(uint32_t revision, const char *author,
+			const char *log, const char *uuid, const char *url,
+			unsigned long timestamp);
 void fast_export_end_commit(uint32_t revision);
 void fast_export_data(uint32_t mode, uint32_t len, struct line_buffer *input);
 
diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c
index 7b5b5ec..897349e 100644
--- a/vcs-svn/svndump.c
+++ b/vcs-svn/svndump.c
@@ -11,7 +11,6 @@
 #include "repo_tree.h"
 #include "fast_export.h"
 #include "line_buffer.h"
-#include "string_pool.h"
 #include "strbuf.h"
 
 #define REPORT_FILENO 3
@@ -42,13 +41,14 @@ static struct {
 } node_ctx;
 
 static struct {
-	uint32_t revision, author;
+	uint32_t revision;
 	unsigned long timestamp;
-	struct strbuf log;
+	struct strbuf log, author;
 } rev_ctx;
 
 static struct {
-	uint32_t version, uuid, url;
+	uint32_t version;
+	struct strbuf uuid, url;
 } dump_ctx;
 
 static void reset_node_ctx(char *fname)
@@ -71,14 +71,16 @@ static void reset_rev_ctx(uint32_t revision)
 	rev_ctx.revision = revision;
 	rev_ctx.timestamp = 0;
 	strbuf_reset(&rev_ctx.log);
-	rev_ctx.author = ~0;
+	strbuf_reset(&rev_ctx.author);
 }
 
-static void reset_dump_ctx(uint32_t url)
+static void reset_dump_ctx(const char *url)
 {
-	dump_ctx.url = url;
+	strbuf_reset(&dump_ctx.url);
+	if (url)
+		strbuf_addstr(&dump_ctx.url, url);
 	dump_ctx.version = 1;
-	dump_ctx.uuid = ~0;
+	strbuf_reset(&dump_ctx.uuid);
 }
 
 /* Compare string to literal of equal length; must be guarded by length test. */
@@ -101,7 +103,9 @@ static void handle_property(struct strbuf *key_buf, const char *val, uint32_t le
 	case sizeof("svn:author"):
 		if (constcmp(key, "svn:author"))
 			break;
-		rev_ctx.author = pool_intern(val);
+		strbuf_reset(&rev_ctx.author);
+		if (val)
+			strbuf_add(&rev_ctx.author, val, len);
 		break;
 	case sizeof("svn:date"):
 		if (constcmp(key, "svn:date"))
@@ -277,8 +281,9 @@ static void begin_revision(void)
 {
 	if (!rev_ctx.revision)	/* revision 0 gets no git commit. */
 		return;
-	fast_export_begin_commit(rev_ctx.revision, rev_ctx.author, rev_ctx.log.buf,
-		dump_ctx.uuid, dump_ctx.url, rev_ctx.timestamp);
+	fast_export_begin_commit(rev_ctx.revision, rev_ctx.author.buf,
+		rev_ctx.log.buf, dump_ctx.uuid.buf, dump_ctx.url.buf,
+		rev_ctx.timestamp);
 }
 
 static void end_revision(void)
@@ -294,7 +299,7 @@ void svndump_read(const char *url)
 	uint32_t active_ctx = DUMP_CTX;
 	uint32_t len;
 
-	reset_dump_ctx(pool_intern(url));
+	reset_dump_ctx(url);
 	while ((t = buffer_read_line(&input))) {
 		val = strstr(t, ": ");
 		if (!val)
@@ -315,7 +320,8 @@ void svndump_read(const char *url)
 		case sizeof("UUID"):
 			if (constcmp(t, "UUID"))
 				continue;
-			dump_ctx.uuid = pool_intern(val);
+			strbuf_reset(&dump_ctx.uuid);
+			strbuf_addstr(&dump_ctx.uuid, val);
 			break;
 		case sizeof("Revision-number"):
 			if (constcmp(t, "Revision-number"))
@@ -444,10 +450,13 @@ int svndump_init(const char *filename)
 	if (buffer_init(&input, filename))
 		return error("cannot open %s: %s", filename, strerror(errno));
 	fast_export_init(REPORT_FILENO);
+	strbuf_init(&dump_ctx.uuid, 4096);
+	strbuf_init(&dump_ctx.url, 4096);
 	strbuf_init(&rev_ctx.log, 4096);
+	strbuf_init(&rev_ctx.author, 4096);
 	strbuf_init(&node_ctx.src, 4096);
 	strbuf_init(&node_ctx.dst, 4096);
-	reset_dump_ctx(~0);
+	reset_dump_ctx(NULL);
 	reset_rev_ctx(0);
 	reset_node_ctx(NULL);
 	return 0;
@@ -456,7 +465,7 @@ int svndump_init(const char *filename)
 void svndump_deinit(void)
 {
 	fast_export_deinit();
-	reset_dump_ctx(~0);
+	reset_dump_ctx(NULL);
 	reset_rev_ctx(0);
 	reset_node_ctx(NULL);
 	strbuf_release(&rev_ctx.log);
@@ -472,5 +481,10 @@ void svndump_reset(void)
 {
 	fast_export_reset();
 	buffer_reset(&input);
-	pool_reset();
+	strbuf_release(&dump_ctx.uuid);
+	strbuf_release(&dump_ctx.url);
+	strbuf_release(&rev_ctx.log);
+	strbuf_release(&rev_ctx.author);
+	strbuf_release(&node_ctx.src);
+	strbuf_release(&node_ctx.dst);
 }
-- 
1.7.3.2.846.gf4b062

^ permalink raw reply related	[flat|nested] 72+ messages in thread

* [PATCH 08/11] vcs-svn: drop string_pool
  2011-03-21 23:49 ` [PATCHv2 00/11] vcs-svn: purge obsolete data structures and code David Barr
                     ` (6 preceding siblings ...)
  2011-03-21 23:49   ` [PATCH 07/11] vcs-svn: factor out usage of string_pool David Barr
@ 2011-03-21 23:49   ` David Barr
  2011-03-21 23:49   ` =?^[?q?=5BPATCH=2009/11=5D=20vcs-svn=3A=20drop=20trp=2Eh?= David Barr
                     ` (3 subsequent siblings)
  11 siblings, 0 replies; 72+ messages in thread
From: David Barr @ 2011-03-21 23:49 UTC (permalink / raw)
  To: Git Mailing List
  Cc: Jonathan Nieder, Ramkumar Ramachandra, Sverre Rabbelier,
	Sam Vilain, Stephen Bash, Tomas Carnecky, David Barr

Signed-off-by: David Barr <david.barr@cordelta.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: David Barr <david.barr@cordelta.com>
Reviewed-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: David Barr <david.barr@cordelta.com>
---
 .gitignore              |    1 -
 Makefile                |   12 ++---
 t/t0080-vcs-svn.sh      |   16 -------
 test-string-pool.c      |   31 -------------
 vcs-svn/string_pool.c   |  113 -----------------------------------------------
 vcs-svn/string_pool.h   |   12 -----
 vcs-svn/string_pool.txt |   43 ------------------
 7 files changed, 4 insertions(+), 224 deletions(-)
 delete mode 100644 test-string-pool.c
 delete mode 100644 vcs-svn/string_pool.c
 delete mode 100644 vcs-svn/string_pool.h
 delete mode 100644 vcs-svn/string_pool.txt

diff --git a/.gitignore b/.gitignore
index c460c66..215e842 100644
--- a/.gitignore
+++ b/.gitignore
@@ -177,7 +177,6 @@
 /test-run-command
 /test-sha1
 /test-sigchain
-/test-string-pool
 /test-subprocess
 /test-svn-fe
 /test-treap
diff --git a/Makefile b/Makefile
index ade7923..f8182e5 100644
--- a/Makefile
+++ b/Makefile
@@ -430,7 +430,6 @@ TEST_PROGRAMS_NEED_X += test-path-utils
 TEST_PROGRAMS_NEED_X += test-run-command
 TEST_PROGRAMS_NEED_X += test-sha1
 TEST_PROGRAMS_NEED_X += test-sigchain
-TEST_PROGRAMS_NEED_X += test-string-pool
 TEST_PROGRAMS_NEED_X += test-subprocess
 TEST_PROGRAMS_NEED_X += test-svn-fe
 TEST_PROGRAMS_NEED_X += test-treap
@@ -1838,10 +1837,9 @@ ifndef NO_CURL
 endif
 XDIFF_OBJS = xdiff/xdiffi.o xdiff/xprepare.o xdiff/xutils.o xdiff/xemit.o \
 	xdiff/xmerge.o xdiff/xpatience.o
-VCSSVN_OBJS = vcs-svn/string_pool.o vcs-svn/line_buffer.o \
-	vcs-svn/repo_tree.o vcs-svn/fast_export.o vcs-svn/svndump.o
-VCSSVN_TEST_OBJS = test-obj-pool.o test-string-pool.o \
-	test-line-buffer.o test-treap.o
+VCSSVN_OBJS = vcs-svn/line_buffer.o vcs-svn/repo_tree.o \
+	vcs-svn/fast_export.o vcs-svn/svndump.o
+VCSSVN_TEST_OBJS = test-obj-pool.o test-line-buffer.o test-treap.o
 OBJECTS := $(GIT_OBJS) $(XDIFF_OBJS) $(VCSSVN_OBJS)
 
 dep_files := $(foreach f,$(OBJECTS),$(dir $f).depend/$(notdir $f).d)
@@ -1965,7 +1963,7 @@ xdiff-interface.o $(XDIFF_OBJS): \
 	xdiff/xutils.h xdiff/xprepare.h xdiff/xdiffi.h xdiff/xemit.h
 
 $(VCSSVN_OBJS) $(VCSSVN_TEST_OBJS): $(LIB_H) \
-	vcs-svn/obj_pool.h vcs-svn/trp.h vcs-svn/string_pool.h \
+	vcs-svn/obj_pool.h vcs-svn/trp.h \
 	vcs-svn/line_buffer.h vcs-svn/repo_tree.h vcs-svn/fast_export.h \
 	vcs-svn/svndump.h
 
@@ -2133,8 +2131,6 @@ test-line-buffer$X: vcs-svn/lib.a
 
 test-parse-options$X: parse-options.o
 
-test-string-pool$X: vcs-svn/lib.a
-
 test-svn-fe$X: vcs-svn/lib.a
 
 .PRECIOUS: $(TEST_OBJS)
diff --git a/t/t0080-vcs-svn.sh b/t/t0080-vcs-svn.sh
index 99a314b..ce02c58 100755
--- a/t/t0080-vcs-svn.sh
+++ b/t/t0080-vcs-svn.sh
@@ -76,22 +76,6 @@ test_expect_success 'obj pool: high-water mark' '
 	test_cmp expected actual
 '
 
-test_expect_success 'string pool' '
-	echo a does not equal b >expected.differ &&
-	echo a equals a >expected.match &&
-	echo equals equals equals >expected.matchmore &&
-
-	test-string-pool "a,--b" >actual.differ &&
-	test-string-pool "a,a" >actual.match &&
-	test-string-pool "equals-equals" >actual.matchmore &&
-	test_must_fail test-string-pool a,a,a &&
-	test_must_fail test-string-pool a &&
-
-	test_cmp expected.differ actual.differ &&
-	test_cmp expected.match actual.match &&
-	test_cmp expected.matchmore actual.matchmore
-'
-
 test_expect_success 'treap sort' '
 	cat <<-\EOF >unsorted &&
 	68
diff --git a/test-string-pool.c b/test-string-pool.c
deleted file mode 100644
index c5782e6..0000000
--- a/test-string-pool.c
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * test-string-pool.c: code to exercise the svn importer's string pool
- */
-
-#include "git-compat-util.h"
-#include "vcs-svn/string_pool.h"
-
-int main(int argc, char *argv[])
-{
-	const uint32_t unequal = pool_intern("does not equal");
-	const uint32_t equal = pool_intern("equals");
-	uint32_t buf[3];
-	uint32_t n;
-
-	if (argc != 2)
-		usage("test-string-pool <string>,<string>");
-
-	n = pool_tok_seq(3, buf, ",-", argv[1]);
-	if (n >= 3)
-		die("too many strings");
-	if (n <= 1)
-		die("too few strings");
-
-	buf[2] = buf[1];
-	buf[1] = (buf[0] == buf[2]) ? equal : unequal;
-	pool_print_seq(3, buf, ' ', stdout);
-	fputc('\n', stdout);
-
-	pool_reset();
-	return 0;
-}
diff --git a/vcs-svn/string_pool.c b/vcs-svn/string_pool.c
deleted file mode 100644
index be43598..0000000
--- a/vcs-svn/string_pool.c
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * Licensed under a two-clause BSD-style license.
- * See LICENSE for details.
- */
-
-#include "git-compat-util.h"
-#include "quote.h"
-#include "trp.h"
-#include "obj_pool.h"
-#include "string_pool.h"
-
-static struct trp_root tree = { ~0 };
-
-struct node {
-	uint32_t offset;
-	struct trp_node children;
-};
-
-/* Two memory pools: one for struct node, and another for strings */
-obj_pool_gen(node, struct node, 4096)
-obj_pool_gen(string, char, 4096)
-
-static char *node_value(struct node *node)
-{
-	return node ? string_pointer(node->offset) : NULL;
-}
-
-static int node_cmp(struct node *a, struct node *b)
-{
-	return strcmp(node_value(a), node_value(b));
-}
-
-/* Build a Treap from the node structure (a trp_node w/ offset) */
-trp_gen(static, tree_, struct node, children, node, node_cmp);
-
-const char *pool_fetch(uint32_t entry)
-{
-	return node_value(node_pointer(entry));
-}
-
-uint32_t pool_intern(const char *key)
-{
-	/* Canonicalize key */
-	struct node *match = NULL, *node;
-	uint32_t key_len;
-	if (key == NULL)
-		return ~0;
-	key_len = strlen(key) + 1;
-	node = node_pointer(node_alloc(1));
-	node->offset = string_alloc(key_len);
-	strcpy(node_value(node), key);
-	match = tree_search(&tree, node);
-	if (!match) {
-		tree_insert(&tree, node);
-	} else {
-		node_free(1);
-		string_free(key_len);
-		node = match;
-	}
-	return node_offset(node);
-}
-
-uint32_t pool_tok_r(char *str, const char *delim, char **saveptr)
-{
-	char *token = strtok_r(str, delim, saveptr);
-	return token ? pool_intern(token) : ~0;
-}
-
-void pool_print_seq(uint32_t len, const uint32_t *seq, char delim, FILE *stream)
-{
-	uint32_t i;
-	for (i = 0; i < len && ~seq[i]; i++) {
-		fputs(pool_fetch(seq[i]), stream);
-		if (i < len - 1 && ~seq[i + 1])
-			fputc(delim, stream);
-	}
-}
-
-void pool_print_seq_q(uint32_t len, const uint32_t *seq, char delim, FILE *stream)
-{
-	uint32_t i;
-	for (i = 0; i < len && ~seq[i]; i++) {
-		quote_c_style(pool_fetch(seq[i]), NULL, stream, 1);
-		if (i < len - 1 && ~seq[i + 1])
-			fputc(delim, stream);
-	}
-}
-
-uint32_t pool_tok_seq(uint32_t sz, uint32_t *seq, const char *delim, char *str)
-{
-	char *context = NULL;
-	uint32_t token = ~0;
-	uint32_t length;
-
-	if (sz == 0)
-		return ~0;
-	if (str)
-		token = pool_tok_r(str, delim, &context);
-	for (length = 0; length < sz; length++) {
-		seq[length] = token;
-		if (token == ~0)
-			return length;
-		token = pool_tok_r(NULL, delim, &context);
-	}
-	seq[sz - 1] = ~0;
-	return sz;
-}
-
-void pool_reset(void)
-{
-	node_reset();
-	string_reset();
-}
diff --git a/vcs-svn/string_pool.h b/vcs-svn/string_pool.h
deleted file mode 100644
index 96e501d..0000000
--- a/vcs-svn/string_pool.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef STRING_POOL_H_
-#define STRING_POOL_H_
-
-uint32_t pool_intern(const char *key);
-const char *pool_fetch(uint32_t entry);
-uint32_t pool_tok_r(char *str, const char *delim, char **saveptr);
-void pool_print_seq(uint32_t len, const uint32_t *seq, char delim, FILE *stream);
-void pool_print_seq_q(uint32_t len, const uint32_t *seq, char delim, FILE *stream);
-uint32_t pool_tok_seq(uint32_t sz, uint32_t *seq, const char *delim, char *str);
-void pool_reset(void);
-
-#endif
diff --git a/vcs-svn/string_pool.txt b/vcs-svn/string_pool.txt
deleted file mode 100644
index 1b41f15..0000000
--- a/vcs-svn/string_pool.txt
+++ /dev/null
@@ -1,43 +0,0 @@
-string_pool API
-===============
-
-The string_pool API provides facilities for replacing strings
-with integer keys that can be more easily compared and stored.
-The facilities are designed so that one could teach Git without
-too much trouble to store the information needed for these keys to
-remain valid over multiple executions.
-
-Functions
----------
-
-pool_intern::
-	Include a string in the string pool and get its key.
-	If that string is already in the pool, retrieves its
-	existing key.
-
-pool_fetch::
-	Retrieve the string associated to a given key.
-
-pool_tok_r::
-	Extract the key of the next token from a string.
-	Interface mimics strtok_r.
-
-pool_print_seq::
-	Print a sequence of strings named by key to a file, using the
-	specified delimiter to separate them.
-
-	If NULL (key ~0) appears in the sequence, the sequence ends
-	early.
-
-pool_tok_seq::
-	Split a string into tokens, storing the keys of segments
-	into a caller-provided array.
-
-	Unless sz is 0, the array will always be ~0-terminated.
-	If there is not enough room for all the tokens, the
-	array holds as many tokens as fit in the entries before
-	the terminating ~0.  Return value is the index after the
-	last token, or sz if the tokens did not fit.
-
-pool_reset::
-	Deallocate storage for the string pool.
-- 
1.7.3.2.846.gf4b062

^ permalink raw reply related	[flat|nested] 72+ messages in thread

*  =?^[?q?=5BPATCH=2009/11=5D=20vcs-svn=3A=20drop=20trp=2Eh?=
  2011-03-21 23:49 ` [PATCHv2 00/11] vcs-svn: purge obsolete data structures and code David Barr
                     ` (7 preceding siblings ...)
  2011-03-21 23:49   ` [PATCH 08/11] vcs-svn: drop string_pool David Barr
@ 2011-03-21 23:49   ` David Barr
  2011-03-21 23:49   ` [PATCH 10/11] vcs-svn: drop obj_pool.h David Barr
                     ` (2 subsequent siblings)
  11 siblings, 0 replies; 72+ messages in thread
From: David Barr @ 2011-03-21 23:49 UTC (permalink / raw)
  To: Git Mailing List
  Cc: Jonathan Nieder, Ramkumar Ramachandra, Sverre Rabbelier,
	Sam Vilain, Stephen Bash, Tomas Carnecky, David Barr

Signed-off-by: David Barr <david.barr@cordelta.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: David Barr <david.barr@cordelta.com>
Reviewed-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: David Barr <david.barr@cordelta.com>
---
 .gitignore         |    1 -
 Makefile           |    5 +-
 t/t0080-vcs-svn.sh |   22 -----
 test-treap.c       |   70 ---------------
 vcs-svn/LICENSE    |    3 -
 vcs-svn/trp.h      |  237 ----------------------------------------------------
 vcs-svn/trp.txt    |  109 ------------------------
 7 files changed, 2 insertions(+), 445 deletions(-)
 delete mode 100644 test-treap.c
 delete mode 100644 vcs-svn/trp.h
 delete mode 100644 vcs-svn/trp.txt

diff --git a/.gitignore b/.gitignore
index 215e842..aa94ff1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -179,7 +179,6 @@
 /test-sigchain
 /test-subprocess
 /test-svn-fe
-/test-treap
 /common-cmds.h
 *.tar.gz
 *.dsc
diff --git a/Makefile b/Makefile
index f8182e5..a2cadc5 100644
--- a/Makefile
+++ b/Makefile
@@ -432,7 +432,6 @@ TEST_PROGRAMS_NEED_X += test-sha1
 TEST_PROGRAMS_NEED_X += test-sigchain
 TEST_PROGRAMS_NEED_X += test-subprocess
 TEST_PROGRAMS_NEED_X += test-svn-fe
-TEST_PROGRAMS_NEED_X += test-treap
 TEST_PROGRAMS_NEED_X += test-index-version
 TEST_PROGRAMS_NEED_X += test-mktemp
 
@@ -1839,7 +1838,7 @@ XDIFF_OBJS = xdiff/xdiffi.o xdiff/xprepare.o xdiff/xutils.o xdiff/xemit.o \
 	xdiff/xmerge.o xdiff/xpatience.o
 VCSSVN_OBJS = vcs-svn/line_buffer.o vcs-svn/repo_tree.o \
 	vcs-svn/fast_export.o vcs-svn/svndump.o
-VCSSVN_TEST_OBJS = test-obj-pool.o test-line-buffer.o test-treap.o
+VCSSVN_TEST_OBJS = test-obj-pool.o test-line-buffer.o
 OBJECTS := $(GIT_OBJS) $(XDIFF_OBJS) $(VCSSVN_OBJS)
 
 dep_files := $(foreach f,$(OBJECTS),$(dir $f).depend/$(notdir $f).d)
@@ -1963,7 +1962,7 @@ xdiff-interface.o $(XDIFF_OBJS): \
 	xdiff/xutils.h xdiff/xprepare.h xdiff/xdiffi.h xdiff/xemit.h
 
 $(VCSSVN_OBJS) $(VCSSVN_TEST_OBJS): $(LIB_H) \
-	vcs-svn/obj_pool.h vcs-svn/trp.h \
+	vcs-svn/obj_pool.h \
 	vcs-svn/line_buffer.h vcs-svn/repo_tree.h vcs-svn/fast_export.h \
 	vcs-svn/svndump.h
 
diff --git a/t/t0080-vcs-svn.sh b/t/t0080-vcs-svn.sh
index ce02c58..3f29496 100755
--- a/t/t0080-vcs-svn.sh
+++ b/t/t0080-vcs-svn.sh
@@ -76,26 +76,4 @@ test_expect_success 'obj pool: high-water mark' '
 	test_cmp expected actual
 '
 
-test_expect_success 'treap sort' '
-	cat <<-\EOF >unsorted &&
-	68
-	12
-	13
-	13
-	68
-	13
-	13
-	21
-	10
-	11
-	12
-	13
-	13
-	EOF
-	sort unsorted >expected &&
-
-	test-treap <unsorted >actual &&
-	test_cmp expected actual
-'
-
 test_done
diff --git a/test-treap.c b/test-treap.c
deleted file mode 100644
index ab8c951..0000000
--- a/test-treap.c
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * test-treap.c: code to exercise the svn importer's treap structure
- */
-
-#include "cache.h"
-#include "vcs-svn/obj_pool.h"
-#include "vcs-svn/trp.h"
-
-struct int_node {
-	uintmax_t n;
-	struct trp_node children;
-};
-
-obj_pool_gen(node, struct int_node, 3)
-
-static int node_cmp(struct int_node *a, struct int_node *b)
-{
-	return (a->n > b->n) - (a->n < b->n);
-}
-
-trp_gen(static, treap_, struct int_node, children, node, node_cmp)
-
-static void strtonode(struct int_node *item, const char *s)
-{
-	char *end;
-	item->n = strtoumax(s, &end, 10);
-	if (*s == '\0' || (*end != '\n' && *end != '\0'))
-		die("invalid integer: %s", s);
-}
-
-int main(int argc, char *argv[])
-{
-	struct strbuf sb = STRBUF_INIT;
-	struct trp_root root = { ~0 };
-	uint32_t item;
-
-	if (argc != 1)
-		usage("test-treap < ints");
-
-	while (strbuf_getline(&sb, stdin, '\n') != EOF) {
-		struct int_node *node = node_pointer(node_alloc(1));
-
-		item = node_offset(node);
-		strtonode(node, sb.buf);
-		node = treap_insert(&root, node_pointer(item));
-		if (node_offset(node) != item)
-			die("inserted %"PRIu32" in place of %"PRIu32"",
-				node_offset(node), item);
-	}
-
-	item = node_offset(treap_first(&root));
-	while (~item) {
-		uint32_t next;
-		struct int_node *tmp = node_pointer(node_alloc(1));
-
-		tmp->n = node_pointer(item)->n;
-		next = node_offset(treap_next(&root, node_pointer(item)));
-
-		treap_remove(&root, node_pointer(item));
-		item = node_offset(treap_nsearch(&root, tmp));
-
-		if (item != next && (!~item || node_pointer(item)->n != tmp->n))
-			die("found %"PRIuMAX" in place of %"PRIuMAX"",
-				~item ? node_pointer(item)->n : ~(uintmax_t) 0,
-				~next ? node_pointer(next)->n : ~(uintmax_t) 0);
-		printf("%"PRIuMAX"\n", tmp->n);
-	}
-	node_reset();
-	return 0;
-}
diff --git a/vcs-svn/LICENSE b/vcs-svn/LICENSE
index 0a5e3c4..533f585 100644
--- a/vcs-svn/LICENSE
+++ b/vcs-svn/LICENSE
@@ -1,9 +1,6 @@
 Copyright (C) 2010 David Barr <david.barr@cordelta.com>.
 All rights reserved.
 
-Copyright (C) 2008 Jason Evans <jasone@canonware.com>.
-All rights reserved.
-
 Copyright (C) 2005 Stefan Hegny, hydrografix Consulting GmbH,
 Frankfurt/Main, Germany
 and others, see http://svn2cc.sarovar.org
diff --git a/vcs-svn/trp.h b/vcs-svn/trp.h
deleted file mode 100644
index c32b918..0000000
--- a/vcs-svn/trp.h
+++ /dev/null
@@ -1,237 +0,0 @@
-/*
- * C macro implementation of treaps.
- *
- * Usage:
- *   #include <stdint.h>
- *   #include "trp.h"
- *   trp_gen(...)
- *
- * Licensed under a two-clause BSD-style license.
- * See LICENSE for details.
- */
-
-#ifndef TRP_H_
-#define TRP_H_
-
-#define MAYBE_UNUSED __attribute__((__unused__))
-
-/* Node structure. */
-struct trp_node {
-	uint32_t trpn_left;
-	uint32_t trpn_right;
-};
-
-/* Root structure. */
-struct trp_root {
-	uint32_t trp_root;
-};
-
-/* Pointer/Offset conversion. */
-#define trpn_pointer(a_base, a_offset) (a_base##_pointer(a_offset))
-#define trpn_offset(a_base, a_pointer) (a_base##_offset(a_pointer))
-#define trpn_modify(a_base, a_offset) \
-	do { \
-		if ((a_offset) < a_base##_pool.committed) { \
-			uint32_t old_offset = (a_offset);\
-			(a_offset) = a_base##_alloc(1); \
-			*trpn_pointer(a_base, a_offset) = \
-				*trpn_pointer(a_base, old_offset); \
-		} \
-	} while (0)
-
-/* Left accessors. */
-#define trp_left_get(a_base, a_field, a_node) \
-	(trpn_pointer(a_base, a_node)->a_field.trpn_left)
-#define trp_left_set(a_base, a_field, a_node, a_left) \
-	do { \
-		trpn_modify(a_base, a_node); \
-		trp_left_get(a_base, a_field, a_node) = (a_left); \
-	} while (0)
-
-/* Right accessors. */
-#define trp_right_get(a_base, a_field, a_node) \
-	(trpn_pointer(a_base, a_node)->a_field.trpn_right)
-#define trp_right_set(a_base, a_field, a_node, a_right) \
-	do { \
-		trpn_modify(a_base, a_node); \
-		trp_right_get(a_base, a_field, a_node) = (a_right); \
-	} while (0)
-
-/*
- * Fibonacci hash function.
- * The multiplier is the nearest prime to (2^32 times (√5 - 1)/2).
- * See Knuth §6.4: volume 3, 3rd ed, p518.
- */
-#define trpn_hash(a_node) (uint32_t) (2654435761u * (a_node))
-
-/* Priority accessors. */
-#define trp_prio_get(a_node) trpn_hash(a_node)
-
-/* Node initializer. */
-#define trp_node_new(a_base, a_field, a_node) \
-	do { \
-		trp_left_set(a_base, a_field, (a_node), ~0); \
-		trp_right_set(a_base, a_field, (a_node), ~0); \
-	} while (0)
-
-/* Internal utility macros. */
-#define trpn_first(a_base, a_field, a_root, r_node) \
-	do { \
-		(r_node) = (a_root); \
-		if ((r_node) == ~0) \
-			return NULL; \
-		while (~trp_left_get(a_base, a_field, (r_node))) \
-			(r_node) = trp_left_get(a_base, a_field, (r_node)); \
-	} while (0)
-
-#define trpn_rotate_left(a_base, a_field, a_node, r_node) \
-	do { \
-		(r_node) = trp_right_get(a_base, a_field, (a_node)); \
-		trp_right_set(a_base, a_field, (a_node), \
-			trp_left_get(a_base, a_field, (r_node))); \
-		trp_left_set(a_base, a_field, (r_node), (a_node)); \
-	} while (0)
-
-#define trpn_rotate_right(a_base, a_field, a_node, r_node) \
-	do { \
-		(r_node) = trp_left_get(a_base, a_field, (a_node)); \
-		trp_left_set(a_base, a_field, (a_node), \
-			trp_right_get(a_base, a_field, (r_node))); \
-		trp_right_set(a_base, a_field, (r_node), (a_node)); \
-	} while (0)
-
-#define trp_gen(a_attr, a_pre, a_type, a_field, a_base, a_cmp) \
-a_attr a_type MAYBE_UNUSED *a_pre##first(struct trp_root *treap) \
-{ \
-	uint32_t ret; \
-	trpn_first(a_base, a_field, treap->trp_root, ret); \
-	return trpn_pointer(a_base, ret); \
-} \
-a_attr a_type MAYBE_UNUSED *a_pre##next(struct trp_root *treap, a_type *node) \
-{ \
-	uint32_t ret; \
-	uint32_t offset = trpn_offset(a_base, node); \
-	if (~trp_right_get(a_base, a_field, offset)) { \
-		trpn_first(a_base, a_field, \
-			trp_right_get(a_base, a_field, offset), ret); \
-	} else { \
-		uint32_t tnode = treap->trp_root; \
-		ret = ~0; \
-		while (1) { \
-			int cmp = (a_cmp)(trpn_pointer(a_base, offset), \
-				trpn_pointer(a_base, tnode)); \
-			if (cmp < 0) { \
-				ret = tnode; \
-				tnode = trp_left_get(a_base, a_field, tnode); \
-			} else if (cmp > 0) { \
-				tnode = trp_right_get(a_base, a_field, tnode); \
-			} else { \
-				break; \
-			} \
-		} \
-	} \
-	return trpn_pointer(a_base, ret); \
-} \
-a_attr a_type MAYBE_UNUSED *a_pre##search(struct trp_root *treap, a_type *key) \
-{ \
-	int cmp; \
-	uint32_t ret = treap->trp_root; \
-	while (~ret && (cmp = (a_cmp)(key, trpn_pointer(a_base, ret)))) { \
-		if (cmp < 0) { \
-			ret = trp_left_get(a_base, a_field, ret); \
-		} else { \
-			ret = trp_right_get(a_base, a_field, ret); \
-		} \
-	} \
-	return trpn_pointer(a_base, ret); \
-} \
-a_attr a_type MAYBE_UNUSED *a_pre##nsearch(struct trp_root *treap, a_type *key) \
-{ \
-	int cmp; \
-	uint32_t ret = treap->trp_root; \
-	while (~ret && (cmp = (a_cmp)(key, trpn_pointer(a_base, ret)))) { \
-		if (cmp < 0) { \
-			if (!~trp_left_get(a_base, a_field, ret)) \
-				break; \
-			ret = trp_left_get(a_base, a_field, ret); \
-		} else { \
-			ret = trp_right_get(a_base, a_field, ret); \
-		} \
-	} \
-	return trpn_pointer(a_base, ret); \
-} \
-a_attr uint32_t MAYBE_UNUSED a_pre##insert_recurse(uint32_t cur_node, uint32_t ins_node) \
-{ \
-	if (cur_node == ~0) { \
-		return ins_node; \
-	} else { \
-		uint32_t ret; \
-		int cmp = (a_cmp)(trpn_pointer(a_base, ins_node), \
-					trpn_pointer(a_base, cur_node)); \
-		if (cmp < 0) { \
-			uint32_t left = a_pre##insert_recurse( \
-				trp_left_get(a_base, a_field, cur_node), ins_node); \
-			trp_left_set(a_base, a_field, cur_node, left); \
-			if (trp_prio_get(left) < trp_prio_get(cur_node)) \
-				trpn_rotate_right(a_base, a_field, cur_node, ret); \
-			else \
-				ret = cur_node; \
-		} else { \
-			uint32_t right = a_pre##insert_recurse( \
-				trp_right_get(a_base, a_field, cur_node), ins_node); \
-			trp_right_set(a_base, a_field, cur_node, right); \
-			if (trp_prio_get(right) < trp_prio_get(cur_node)) \
-				trpn_rotate_left(a_base, a_field, cur_node, ret); \
-			else \
-				ret = cur_node; \
-		} \
-		return ret; \
-	} \
-} \
-a_attr a_type *MAYBE_UNUSED a_pre##insert(struct trp_root *treap, a_type *node) \
-{ \
-	uint32_t offset = trpn_offset(a_base, node); \
-	trp_node_new(a_base, a_field, offset); \
-	treap->trp_root = a_pre##insert_recurse(treap->trp_root, offset); \
-	return trpn_pointer(a_base, offset); \
-} \
-a_attr uint32_t MAYBE_UNUSED a_pre##remove_recurse(uint32_t cur_node, uint32_t rem_node) \
-{ \
-	int cmp = a_cmp(trpn_pointer(a_base, rem_node), \
-			trpn_pointer(a_base, cur_node)); \
-	if (cmp == 0) { \
-		uint32_t ret; \
-		uint32_t left = trp_left_get(a_base, a_field, cur_node); \
-		uint32_t right = trp_right_get(a_base, a_field, cur_node); \
-		if (left == ~0) { \
-			if (right == ~0) \
-				return ~0; \
-		} else if (right == ~0 || trp_prio_get(left) < trp_prio_get(right)) { \
-			trpn_rotate_right(a_base, a_field, cur_node, ret); \
-			right = a_pre##remove_recurse(cur_node, rem_node); \
-			trp_right_set(a_base, a_field, ret, right); \
-			return ret; \
-		} \
-		trpn_rotate_left(a_base, a_field, cur_node, ret); \
-		left = a_pre##remove_recurse(cur_node, rem_node); \
-		trp_left_set(a_base, a_field, ret, left); \
-		return ret; \
-	} else if (cmp < 0) { \
-		uint32_t left = a_pre##remove_recurse( \
-			trp_left_get(a_base, a_field, cur_node), rem_node); \
-		trp_left_set(a_base, a_field, cur_node, left); \
-		return cur_node; \
-	} else { \
-		uint32_t right = a_pre##remove_recurse( \
-			trp_right_get(a_base, a_field, cur_node), rem_node); \
-		trp_right_set(a_base, a_field, cur_node, right); \
-		return cur_node; \
-	} \
-} \
-a_attr void MAYBE_UNUSED a_pre##remove(struct trp_root *treap, a_type *node) \
-{ \
-	treap->trp_root = a_pre##remove_recurse(treap->trp_root, \
-		trpn_offset(a_base, node)); \
-} \
-
-#endif
diff --git a/vcs-svn/trp.txt b/vcs-svn/trp.txt
deleted file mode 100644
index 5ca6b42..0000000
--- a/vcs-svn/trp.txt
+++ /dev/null
@@ -1,109 +0,0 @@
-Motivation
-==========
-
-Treaps provide a memory-efficient binary search tree structure.
-Insertion/deletion/search are about as about as fast in the average
-case as red-black trees and the chances of worst-case behavior are
-vanishingly small, thanks to (pseudo-)randomness.  The bad worst-case
-behavior is a small price to pay, given that treaps are much simpler
-to implement.
-
-API
-===
-
-The trp API generates a data structure and functions to handle a
-large growing set of objects stored in a pool.
-
-The caller:
-
-. Specifies parameters for the generated functions with the
-  trp_gen(static, foo_, ...) macro.
-
-. Allocates a `struct trp_root` variable and sets it to {~0}.
-
-. Adds new nodes to the set using `foo_insert`.  Any pointers
-  to existing nodes cannot be relied upon any more, so the caller
-  might retrieve them anew with `foo_pointer`.
-
-. Can find a specific item in the set using `foo_search`.
-
-. Can iterate over items in the set using `foo_first` and `foo_next`.
-
-. Can remove an item from the set using `foo_remove`.
-
-Example:
-
-----
-struct ex_node {
-	const char *s;
-	struct trp_node ex_link;
-};
-static struct trp_root ex_base = {~0};
-obj_pool_gen(ex, struct ex_node, 4096);
-trp_gen(static, ex_, struct ex_node, ex_link, ex, strcmp)
-struct ex_node *item;
-
-item = ex_pointer(ex_alloc(1));
-item->s = "hello";
-ex_insert(&ex_base, item);
-item = ex_pointer(ex_alloc(1));
-item->s = "goodbye";
-ex_insert(&ex_base, item);
-for (item = ex_first(&ex_base); item; item = ex_next(&ex_base, item))
-	printf("%s\n", item->s);
-----
-
-Functions
----------
-
-trp_gen(attr, foo_, node_type, link_field, pool, cmp)::
-
-	Generate a type-specific treap implementation.
-+
-. The storage class for generated functions will be 'attr' (e.g., `static`).
-. Generated function names are prefixed with 'foo_' (e.g., `treap_`).
-. Treap nodes will be of type 'node_type' (e.g., `struct treap_node`).
-  This type must be a struct with at least one `struct trp_node` field
-  to point to its children.
-. The field used to access child nodes will be 'link_field'.
-. All treap nodes must lie in the 'pool' object pool.
-. Treap nodes must be totally ordered by the 'cmp' relation, with the
-  following prototype:
-+
-int (*cmp)(node_type \*a, node_type \*b)
-+
-and returning a value less than, equal to, or greater than zero
-according to the result of comparison.
-
-node_type {asterisk}foo_insert(struct trp_root *treap, node_type \*node)::
-
-	Insert node into treap.  If inserted multiple times,
-	a node will appear in the treap multiple times.
-+
-The return value is the address of the node within the treap,
-which might differ from `node` if `pool_alloc` had to call
-`realloc` to expand the pool.
-
-void foo_remove(struct trp_root *treap, node_type \*node)::
-
-	Remove node from treap.  Caller must ensure node is
-	present in treap before using this function.
-
-node_type *foo_search(struct trp_root \*treap, node_type \*key)::
-
-	Search for a node that matches key.  If no match is found,
-	result is NULL.
-
-node_type *foo_nsearch(struct trp_root \*treap, node_type \*key)::
-
-	Like `foo_search`, but if if the key is missing return what
-	would be key's successor, were key in treap (NULL if no
-	successor).
-
-node_type *foo_first(struct trp_root \*treap)::
-
-	Find the first item from the treap, in sorted order.
-
-node_type *foo_next(struct trp_root \*treap, node_type \*node)::
-
-	Find the next item.
-- 
1.7.3.2.846.gf4b062

^ permalink raw reply related	[flat|nested] 72+ messages in thread

* [PATCH 10/11] vcs-svn: drop obj_pool.h
  2011-03-21 23:49 ` [PATCHv2 00/11] vcs-svn: purge obsolete data structures and code David Barr
                     ` (8 preceding siblings ...)
  2011-03-21 23:49   ` =?^[?q?=5BPATCH=2009/11=5D=20vcs-svn=3A=20drop=20trp=2Eh?= David Barr
@ 2011-03-21 23:49   ` David Barr
  2011-03-21 23:50   ` [PATCH 11/11] vcs-svn: use strchr to find RFC822 delimiter David Barr
  2011-03-23  0:32   ` [PULL svn-fe] vcs-svn: simplifications, error handling improvements Jonathan Nieder
  11 siblings, 0 replies; 72+ messages in thread
From: David Barr @ 2011-03-21 23:49 UTC (permalink / raw)
  To: Git Mailing List
  Cc: Jonathan Nieder, Ramkumar Ramachandra, Sverre Rabbelier,
	Sam Vilain, Stephen Bash, Tomas Carnecky, David Barr

Signed-off-by: David Barr <david.barr@cordelta.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: David Barr <david.barr@cordelta.com>
Reviewed-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: David Barr <david.barr@cordelta.com>
---
 .gitignore         |    1 -
 Makefile           |    4 +-
 t/t0080-vcs-svn.sh |   79 -----------------------------------
 test-obj-pool.c    |  116 ----------------------------------------------------
 vcs-svn/obj_pool.h |   61 ---------------------------
 5 files changed, 1 insertions(+), 260 deletions(-)
 delete mode 100755 t/t0080-vcs-svn.sh
 delete mode 100644 test-obj-pool.c
 delete mode 100644 vcs-svn/obj_pool.h

diff --git a/.gitignore b/.gitignore
index aa94ff1..789f922 100644
--- a/.gitignore
+++ b/.gitignore
@@ -171,7 +171,6 @@
 /test-line-buffer
 /test-match-trees
 /test-mktemp
-/test-obj-pool
 /test-parse-options
 /test-path-utils
 /test-run-command
diff --git a/Makefile b/Makefile
index a2cadc5..b802ae9 100644
--- a/Makefile
+++ b/Makefile
@@ -424,7 +424,6 @@ TEST_PROGRAMS_NEED_X += test-dump-cache-tree
 TEST_PROGRAMS_NEED_X += test-genrandom
 TEST_PROGRAMS_NEED_X += test-line-buffer
 TEST_PROGRAMS_NEED_X += test-match-trees
-TEST_PROGRAMS_NEED_X += test-obj-pool
 TEST_PROGRAMS_NEED_X += test-parse-options
 TEST_PROGRAMS_NEED_X += test-path-utils
 TEST_PROGRAMS_NEED_X += test-run-command
@@ -1838,7 +1837,7 @@ XDIFF_OBJS = xdiff/xdiffi.o xdiff/xprepare.o xdiff/xutils.o xdiff/xemit.o \
 	xdiff/xmerge.o xdiff/xpatience.o
 VCSSVN_OBJS = vcs-svn/line_buffer.o vcs-svn/repo_tree.o \
 	vcs-svn/fast_export.o vcs-svn/svndump.o
-VCSSVN_TEST_OBJS = test-obj-pool.o test-line-buffer.o
+VCSSVN_TEST_OBJS = test-line-buffer.o
 OBJECTS := $(GIT_OBJS) $(XDIFF_OBJS) $(VCSSVN_OBJS)
 
 dep_files := $(foreach f,$(OBJECTS),$(dir $f).depend/$(notdir $f).d)
@@ -1962,7 +1961,6 @@ xdiff-interface.o $(XDIFF_OBJS): \
 	xdiff/xutils.h xdiff/xprepare.h xdiff/xdiffi.h xdiff/xemit.h
 
 $(VCSSVN_OBJS) $(VCSSVN_TEST_OBJS): $(LIB_H) \
-	vcs-svn/obj_pool.h \
 	vcs-svn/line_buffer.h vcs-svn/repo_tree.h vcs-svn/fast_export.h \
 	vcs-svn/svndump.h
 
diff --git a/t/t0080-vcs-svn.sh b/t/t0080-vcs-svn.sh
deleted file mode 100755
index 3f29496..0000000
--- a/t/t0080-vcs-svn.sh
+++ /dev/null
@@ -1,79 +0,0 @@
-#!/bin/sh
-
-test_description='check infrastructure for svn importer'
-
-. ./test-lib.sh
-uint32_max=4294967295
-
-test_expect_success 'obj pool: store data' '
-	cat <<-\EOF >expected &&
-	0
-	1
-	EOF
-
-	test-obj-pool <<-\EOF >actual &&
-	alloc one 16
-	set one 13
-	test one 13
-	reset one
-	EOF
-	test_cmp expected actual
-'
-
-test_expect_success 'obj pool: NULL is offset ~0' '
-	echo "$uint32_max" >expected &&
-	echo null one | test-obj-pool >actual &&
-	test_cmp expected actual
-'
-
-test_expect_success 'obj pool: out-of-bounds access' '
-	cat <<-EOF >expected &&
-	0
-	0
-	$uint32_max
-	$uint32_max
-	16
-	20
-	$uint32_max
-	EOF
-
-	test-obj-pool <<-\EOF >actual &&
-	alloc one 16
-	alloc two 16
-	offset one 20
-	offset two 20
-	alloc one 5
-	offset one 20
-	free one 1
-	offset one 20
-	reset one
-	reset two
-	EOF
-	test_cmp expected actual
-'
-
-test_expect_success 'obj pool: high-water mark' '
-	cat <<-\EOF >expected &&
-	0
-	0
-	10
-	20
-	20
-	20
-	EOF
-
-	test-obj-pool <<-\EOF >actual &&
-	alloc one 10
-	committed one
-	alloc one 10
-	commit one
-	committed one
-	alloc one 10
-	free one 20
-	committed one
-	reset one
-	EOF
-	test_cmp expected actual
-'
-
-test_done
diff --git a/test-obj-pool.c b/test-obj-pool.c
deleted file mode 100644
index 5018863..0000000
--- a/test-obj-pool.c
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * test-obj-pool.c: code to exercise the svn importer's object pool
- */
-
-#include "cache.h"
-#include "vcs-svn/obj_pool.h"
-
-enum pool { POOL_ONE, POOL_TWO };
-obj_pool_gen(one, int, 1)
-obj_pool_gen(two, int, 4096)
-
-static uint32_t strtouint32(const char *s)
-{
-	char *end;
-	uintmax_t n = strtoumax(s, &end, 10);
-	if (*s == '\0' || (*end != '\n' && *end != '\0'))
-		die("invalid offset: %s", s);
-	return (uint32_t) n;
-}
-
-static void handle_command(const char *command, enum pool pool, const char *arg)
-{
-	switch (*command) {
-	case 'a':
-		if (!prefixcmp(command, "alloc ")) {
-			uint32_t n = strtouint32(arg);
-			printf("%"PRIu32"\n",
-				pool == POOL_ONE ?
-				one_alloc(n) : two_alloc(n));
-			return;
-		}
-	case 'c':
-		if (!prefixcmp(command, "commit ")) {
-			pool == POOL_ONE ? one_commit() : two_commit();
-			return;
-		}
-		if (!prefixcmp(command, "committed ")) {
-			printf("%"PRIu32"\n",
-				pool == POOL_ONE ?
-				one_pool.committed : two_pool.committed);
-			return;
-		}
-	case 'f':
-		if (!prefixcmp(command, "free ")) {
-			uint32_t n = strtouint32(arg);
-			pool == POOL_ONE ? one_free(n) : two_free(n);
-			return;
-		}
-	case 'n':
-		if (!prefixcmp(command, "null ")) {
-			printf("%"PRIu32"\n",
-				pool == POOL_ONE ?
-				one_offset(NULL) : two_offset(NULL));
-			return;
-		}
-	case 'o':
-		if (!prefixcmp(command, "offset ")) {
-			uint32_t n = strtouint32(arg);
-			printf("%"PRIu32"\n",
-				pool == POOL_ONE ?
-				one_offset(one_pointer(n)) :
-				two_offset(two_pointer(n)));
-			return;
-		}
-	case 'r':
-		if (!prefixcmp(command, "reset ")) {
-			pool == POOL_ONE ? one_reset() : two_reset();
-			return;
-		}
-	case 's':
-		if (!prefixcmp(command, "set ")) {
-			uint32_t n = strtouint32(arg);
-			if (pool == POOL_ONE)
-				*one_pointer(n) = 1;
-			else
-				*two_pointer(n) = 1;
-			return;
-		}
-	case 't':
-		if (!prefixcmp(command, "test ")) {
-			uint32_t n = strtouint32(arg);
-			printf("%d\n", pool == POOL_ONE ?
-				*one_pointer(n) : *two_pointer(n));
-			return;
-		}
-	default:
-		die("unrecognized command: %s", command);
-	}
-}
-
-static void handle_line(const char *line)
-{
-	const char *arg = strchr(line, ' ');
-	enum pool pool;
-
-	if (arg && !prefixcmp(arg + 1, "one"))
-		pool = POOL_ONE;
-	else if (arg && !prefixcmp(arg + 1, "two"))
-		pool = POOL_TWO;
-	else
-		die("no pool specified: %s", line);
-
-	handle_command(line, pool, arg + strlen("one "));
-}
-
-int main(int argc, char *argv[])
-{
-	struct strbuf sb = STRBUF_INIT;
-	if (argc != 1)
-		usage("test-obj-str < script");
-
-	while (strbuf_getline(&sb, stdin, '\n') != EOF)
-		handle_line(sb.buf);
-	strbuf_release(&sb);
-	return 0;
-}
diff --git a/vcs-svn/obj_pool.h b/vcs-svn/obj_pool.h
deleted file mode 100644
index deb6eb8..0000000
--- a/vcs-svn/obj_pool.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Licensed under a two-clause BSD-style license.
- * See LICENSE for details.
- */
-
-#ifndef OBJ_POOL_H_
-#define OBJ_POOL_H_
-
-#include "git-compat-util.h"
-
-#define MAYBE_UNUSED __attribute__((__unused__))
-
-#define obj_pool_gen(pre, obj_t, initial_capacity) \
-static struct { \
-	uint32_t committed; \
-	uint32_t size; \
-	uint32_t capacity; \
-	obj_t *base; \
-} pre##_pool = {0, 0, 0, NULL}; \
-static MAYBE_UNUSED uint32_t pre##_alloc(uint32_t count) \
-{ \
-	uint32_t offset; \
-	if (pre##_pool.size + count > pre##_pool.capacity) { \
-		while (pre##_pool.size + count > pre##_pool.capacity) \
-			if (pre##_pool.capacity) \
-				pre##_pool.capacity *= 2; \
-			else \
-				pre##_pool.capacity = initial_capacity; \
-		pre##_pool.base = realloc(pre##_pool.base, \
-					pre##_pool.capacity * sizeof(obj_t)); \
-	} \
-	offset = pre##_pool.size; \
-	pre##_pool.size += count; \
-	return offset; \
-} \
-static MAYBE_UNUSED void pre##_free(uint32_t count) \
-{ \
-	pre##_pool.size -= count; \
-} \
-static MAYBE_UNUSED uint32_t pre##_offset(obj_t *obj) \
-{ \
-	return obj == NULL ? ~0 : obj - pre##_pool.base; \
-} \
-static MAYBE_UNUSED obj_t *pre##_pointer(uint32_t offset) \
-{ \
-	return offset >= pre##_pool.size ? NULL : &pre##_pool.base[offset]; \
-} \
-static MAYBE_UNUSED void pre##_commit(void) \
-{ \
-	pre##_pool.committed = pre##_pool.size; \
-} \
-static MAYBE_UNUSED void pre##_reset(void) \
-{ \
-	free(pre##_pool.base); \
-	pre##_pool.base = NULL; \
-	pre##_pool.size = 0; \
-	pre##_pool.capacity = 0; \
-	pre##_pool.committed = 0; \
-}
-
-#endif
-- 
1.7.3.2.846.gf4b062

^ permalink raw reply related	[flat|nested] 72+ messages in thread

* [PATCH 11/11] vcs-svn: use strchr to find RFC822 delimiter
  2011-03-21 23:49 ` [PATCHv2 00/11] vcs-svn: purge obsolete data structures and code David Barr
                     ` (9 preceding siblings ...)
  2011-03-21 23:49   ` [PATCH 10/11] vcs-svn: drop obj_pool.h David Barr
@ 2011-03-21 23:50   ` David Barr
  2011-03-23  0:32   ` [PULL svn-fe] vcs-svn: simplifications, error handling improvements Jonathan Nieder
  11 siblings, 0 replies; 72+ messages in thread
From: David Barr @ 2011-03-21 23:50 UTC (permalink / raw)
  To: Git Mailing List
  Cc: Jonathan Nieder, Ramkumar Ramachandra, Sverre Rabbelier,
	Sam Vilain, Stephen Bash, Tomas Carnecky, David Barr

This is a small optimisation (4% reduction in user time) but is the largest
artifact within the parsing portion of svndump.c

Signed-off-by: David Barr <david.barr@cordelta.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: David Barr <david.barr@cordelta.com>
Reviewed-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: David Barr <david.barr@cordelta.com>
---
 vcs-svn/svndump.c |    4 +++-
 1 files changed, 3 insertions(+), 1 deletions(-)

diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c
index 897349e..88abf60 100644
--- a/vcs-svn/svndump.c
+++ b/vcs-svn/svndump.c
@@ -301,10 +301,12 @@ void svndump_read(const char *url)
 
 	reset_dump_ctx(url);
 	while ((t = buffer_read_line(&input))) {
-		val = strstr(t, ": ");
+		val = strchr(t, ':');
 		if (!val)
 			continue;
 		*val++ = '\0';
+		if (*val != ' ')
+			continue;
 		*val++ = '\0';
 
 		/* strlen(key) + 1 */
-- 
1.7.3.2.846.gf4b062

^ permalink raw reply related	[flat|nested] 72+ messages in thread

* [PULL svn-fe] vcs-svn: simplifications, error handling improvements
  2011-03-21 23:49 ` [PATCHv2 00/11] vcs-svn: purge obsolete data structures and code David Barr
                     ` (10 preceding siblings ...)
  2011-03-21 23:50   ` [PATCH 11/11] vcs-svn: use strchr to find RFC822 delimiter David Barr
@ 2011-03-23  0:32   ` Jonathan Nieder
  2011-03-23  5:46     ` Junio C Hamano
  11 siblings, 1 reply; 72+ messages in thread
From: Jonathan Nieder @ 2011-03-23  0:32 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Git Mailing List, David Barr, Ramkumar Ramachandra

(culled cc list)
Hi Junio,

Please pull

  git://repo.or.cz/git/jrn.git svn-fe

to get the following changes on top of master.

These are the patches from David's recent code-purge series that do
not require incremental import support.  I'd like to push out
incremental import support soon, too, but since that makes svn-fe
require feedback from fast-import as it runs, it would be nice to
provide some simple wrapper script to set everything up at the same
time to avoid inconveniencing users too much.

The main impact of the patches currently in svn-fe should be to
improve error handling a little.

David Barr wrote:

> Patch 6 follows the spirit of patches 4 and 5, for a consistent
> approach to switching on constant strings.

I've skipped this one and applied the rest.  You can see the result
in the svn-fe-pu branch.

Thoughts, suggestions, improvements welcome as always.

David Barr (5):
      vcs-svn: use strbuf for revision log
      vcs-svn: use strbuf for author, UUID, and URL
      vcs-svn: implement perfect hash for node-prop keys
      vcs-svn: implement perfect hash for top-level keys
      vcs-svn: use strchr to find RFC822 delimiter

Jonathan Nieder (9):
      vcs-svn: introduce repo_read_path to check the content at a path
      vcs-svn: handle_node: use repo_read_path
      vcs-svn: simplify repo_modify_path and repo_copy
      vcs-svn: allow input errors to be detected promptly
      vcs-svn: improve support for reading large files
      vcs-svn: make buffer_skip_bytes return length read
      vcs-svn: make buffer_copy_bytes return length read
      vcs-svn: improve reporting of input errors
      Merge branch 'db/length-as-hash' into svn-fe

 vcs-svn/fast_export.c   |   27 +++--
 vcs-svn/fast_export.h   |    5 +-
 vcs-svn/line_buffer.c   |   36 ++++---
 vcs-svn/line_buffer.h   |    6 +-
 vcs-svn/line_buffer.txt |    3 +-
 vcs-svn/repo_tree.c     |   43 ++++---
 vcs-svn/repo_tree.h     |   10 +-
 vcs-svn/svndump.c       |  307 +++++++++++++++++++++++++++++------------------
 8 files changed, 265 insertions(+), 172 deletions(-)

^ permalink raw reply	[flat|nested] 72+ messages in thread

* Re: [PULL svn-fe] vcs-svn: simplifications, error handling improvements
  2011-03-23  0:32   ` [PULL svn-fe] vcs-svn: simplifications, error handling improvements Jonathan Nieder
@ 2011-03-23  5:46     ` Junio C Hamano
  2011-03-23  6:03       ` Junio C Hamano
                         ` (3 more replies)
  0 siblings, 4 replies; 72+ messages in thread
From: Junio C Hamano @ 2011-03-23  5:46 UTC (permalink / raw)
  To: Jonathan Nieder; +Cc: Git Mailing List, David Barr, Ramkumar Ramachandra

Jonathan Nieder <jrnieder@gmail.com> writes:

> Please pull
>
>   git://repo.or.cz/git/jrn.git svn-fe
>
> to get the following changes on top of master.

Done.

I only gave a cursory look at "git diff ORIG_HEAD" output immediately
after pulling, but I found that the majority of lines deleted were of
questionable style and the added ones looked more like normal C ;-)

Except for

	switch (keylen + 1) {
        case sizeof("constant string"):
        	...
                break;
        case sizeof("another constant string"):
        	...
	}

which looked a bit unusual. But mistakes in this construct can be easily
caught by the compiler that would notice duplicated case labels, so it
probably is not so brittle as it first looks.

By the way, I've been getting annoyed by these three "sleep 100" getting
stuck in t0081 and spending their sweet timeout while running my tests
(prove is on, "make test </dev/null" to forbid it from reading my stdin).
I see attempts to kill them early with "kill $!" but apparently they are
not working. Can you take a look at it?

^ permalink raw reply	[flat|nested] 72+ messages in thread

* Re: [PULL svn-fe] vcs-svn: simplifications, error handling improvements
  2011-03-23  5:46     ` Junio C Hamano
@ 2011-03-23  6:03       ` Junio C Hamano
  2011-03-26  6:42         ` Jonathan Nieder
  2011-03-23  7:11       ` [PULL svn-fe] vcs-svn: simplifications, error handling improvements David Barr
                         ` (2 subsequent siblings)
  3 siblings, 1 reply; 72+ messages in thread
From: Junio C Hamano @ 2011-03-23  6:03 UTC (permalink / raw)
  To: Jonathan Nieder; +Cc: Git Mailing List, Ævar Arnfjörð Bjarmason

Junio C Hamano <gitster@pobox.com> writes:

> By the way, I've been getting annoyed by these three "sleep 100" getting
> stuck in t0081 and spending their sweet timeout while running my tests
> (prove is on, "make test </dev/null" to forbid it from reading my stdin).
> I see attempts to kill them early with "kill $!" but apparently they are
> not working. Can you take a look at it?

A bit more datapoint.  The first one works just fine, while the second one
gets stuck:

    $ cd t && make T=t0081-line-buffer.sh test
    $ cd t && make T=t0081-line-buffer.sh prove

I notice that [sh] spawned from prove is orphaned.

21643 pts/9    Ss     0:00      \_ bash
17149 pts/9    S+     0:00      |   \_ make T=t0081-line-buffer.sh prove
17155 pts/9    S+     0:00      |       \_ /bin/sh -c echo "*** prove ***"; GIT_CONFIG=.git/
17156 pts/9    S+     0:00      |           \_ /usr/bin/perl /usr/bin/prove --exec /bin/sh t
17157 pts/9    Z+     0:00      |               \_ [sh] <defunct>

Is this a known bug in prove perhaps?

    $ /usr/bin/prove --version
    TAP::Harness v3.17 and Perl v5.10.1

^ permalink raw reply	[flat|nested] 72+ messages in thread

* Re: [PULL svn-fe] vcs-svn: simplifications, error handling improvements
  2011-03-23  5:46     ` Junio C Hamano
  2011-03-23  6:03       ` Junio C Hamano
@ 2011-03-23  7:11       ` David Barr
  2011-03-24 12:43       ` [PATCH] fixup! vcs-svn: improve reporting of input errors David Barr
  2011-03-26  6:46       ` [PULL svn-fe] vcs-svn: simplifications, error handling improvements Jonathan Nieder
  3 siblings, 0 replies; 72+ messages in thread
From: David Barr @ 2011-03-23  7:11 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Jonathan Nieder, Git Mailing List, David Barr, Ramkumar Ramachandra

Hi,

Junio C Hamano wrote:

> Jonathan Nieder <jrnieder@gmail.com> writes:
> 
>> Please pull
>> 
>>  git://repo.or.cz/git/jrn.git svn-fe
>> 
>> to get the following changes on top of master.
> 
> Done.
> 
> I only gave a cursory look at "git diff ORIG_HEAD" output immediately
> after pulling, but I found that the majority of lines deleted were of
> questionable style and the added ones looked more like normal C ;-)
> 
> Except for
> 
> 	switch (keylen + 1) {
>        case sizeof("constant string"):
>        	...
>                break;
>        case sizeof("another constant string"):
>        	...
> 	}
> 
> which looked a bit unusual. But mistakes in this construct can be easily
> caught by the compiler that would notice duplicated case labels, so it
> probably is not so brittle as it first looks.

Agreed, it is quite an odd construct but it took quite a bit of refinement to
arrive there. It is a compromise between moderately readable and
reasonably fast. I did play around with perfect hash generators prior to
settling on this approach. Ditto re compile time checking.

Jonathan, I suppose I should set up a performance test for these patches
because my first thought was "I wonder what these buy us on their own?"

--
David Barr.

^ permalink raw reply	[flat|nested] 72+ messages in thread

* [PATCH] fixup! vcs-svn: improve reporting of input errors
  2011-03-23  5:46     ` Junio C Hamano
  2011-03-23  6:03       ` Junio C Hamano
  2011-03-23  7:11       ` [PULL svn-fe] vcs-svn: simplifications, error handling improvements David Barr
@ 2011-03-24 12:43       ` David Barr
  2011-03-25  1:12         ` Jonathan Nieder
  2011-03-25  3:34         ` [PATCH svn-fe 0/4] vcs-svn: null bytes in properties Jonathan Nieder
  2011-03-26  6:46       ` [PULL svn-fe] vcs-svn: simplifications, error handling improvements Jonathan Nieder
  3 siblings, 2 replies; 72+ messages in thread
From: David Barr @ 2011-03-24 12:43 UTC (permalink / raw)
  To: Git Mailing List
  Cc: Jonathan Nieder, Ramkumar Ramachandra, Junio C Hamano, David Barr

An excessive constraint was introduced in c9d1c8ba; when reading
svn props, it is permissible for both keys and values to contain
nul characters. Thus the test `strlen(val) != len` may fail on
such properties. This caused svn-fe to die early whilst handling
revision 59151 of the ASF repository.
---
 vcs-svn/svndump.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c
index ea5b128..9bd4fb2 100644
--- a/vcs-svn/svndump.c
+++ b/vcs-svn/svndump.c
@@ -171,7 +171,7 @@ static void read_props(void)
 			die("invalid property line: %s\n", t);
 		len = atoi(&t[2]);
 		val = buffer_read_string(&input, len);
-		if (!val || strlen(val) != len)
+		if (!val)
 			die_short_read();
 
 		/* Discard trailing newline. */
-- 
1.7.3.2.846.gf4b062

^ permalink raw reply related	[flat|nested] 72+ messages in thread

* Re: [PATCH] fixup! vcs-svn: improve reporting of input errors
  2011-03-24 12:43       ` [PATCH] fixup! vcs-svn: improve reporting of input errors David Barr
@ 2011-03-25  1:12         ` Jonathan Nieder
  2011-03-25  3:34         ` [PATCH svn-fe 0/4] vcs-svn: null bytes in properties Jonathan Nieder
  1 sibling, 0 replies; 72+ messages in thread
From: Jonathan Nieder @ 2011-03-25  1:12 UTC (permalink / raw)
  To: David Barr; +Cc: Git Mailing List, Ramkumar Ramachandra, Junio C Hamano

Hi David,

David Barr wrote:

> An excessive constraint was introduced in c9d1c8ba; when reading
> svn props, it is permissible for both keys and values to contain
> nul characters.

Yes, that will work.

buffer_read_string returns a '\0'-terminated string and on early EOF,
the part after the end-of-file will be gibberish (and probably
uninitialized).  But it lives in an 1000-char buffer and errors out
when it doesn't fit, so at least with the fix it wouldn't crash.

Sorry for the breakage.  I suppose a test like the following would
catch future problems of this kind?

Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
---
 t/t9010-svn-fe.sh |   79 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 79 insertions(+), 0 deletions(-)

diff --git a/t/t9010-svn-fe.sh b/t/t9010-svn-fe.sh
index 5a6a4b9..a91b59c 100755
--- a/t/t9010-svn-fe.sh
+++ b/t/t9010-svn-fe.sh
@@ -370,6 +370,85 @@ test_expect_failure 'change file mode but keep old content' '
 	test_cmp hello actual.target
 '
 
+test_expect_failure 'null bytes' '
+	# Caveat: svnadmin 1.6.16 (r1073529) truncates at \0 in the
+	# svn:specialQnotreally example.
+	reinit_git &&
+	cat >expect <<-\EOF &&
+	OBJID
+	:100644 100644 OBJID OBJID M	greeting
+	OBJID
+	:000000 100644 OBJID OBJID A	greeting
+	EOF
+	printf "%s\n" "something with a null byte (Q)" |
+		q_to_nul >expect.message &&
+	printf "%s\n" "helQo" |
+		q_to_nul >expect.hello1 &&
+	printf "%s\n" "link hello" >expect.hello2 &&
+	{
+		properties svn:log "something with a null byte (Q)" &&
+		echo PROPS-END
+	} |
+	q_to_nul >props &&
+	{
+		q_to_nul <<-\EOF &&
+		SVN-fs-dump-format-version: 3
+
+		Revision-number: 1
+		Prop-content-length: 10
+		Content-length: 10
+
+		PROPS-END
+
+		Node-path: greeting
+		Node-kind: file
+		Node-action: add
+		Prop-content-length: 10
+		Text-content-length: 6
+		Content-length: 16
+
+		PROPS-END
+		helQo
+
+		Revision-number: 2
+		EOF
+		echo Prop-content-length: $(wc -c <props) &&
+		echo Content-length: $(wc -c <props) &&
+		echo &&
+		cat props &&
+		q_to_nul <<-\EOF
+
+		Node-path: greeting
+		Node-kind: file
+		Node-action: change
+		Prop-content-length: 43
+		Text-content-length: 11
+		Content-length: 54
+
+		K 21
+		svn:specialQnotreally
+		V 1
+		*
+		PROPS-END
+		link hello
+		EOF
+	} >8bitclean.dump &&
+	test-svn-fe 8bitclean.dump >stream &&
+	git fast-import <stream &&
+	{
+		git rev-list HEAD |
+		git diff-tree --root --stdin |
+		sed "s/$_x40/OBJID/g"
+	} >actual &&
+	git diff-tree --always -s --format=%s HEAD >actual.message &&
+	git cat-file blob HEAD^:greeting >actual.hello1 &&
+	git cat-file blob HEAD:greeting >actual.hello2 &&
+	test_cmp expect actual &&
+	test_cmp expect.message actual.message &&
+	test_cmp expect.hello1 actual.hello1 &&
+	test_cmp expect.hello2 actual.hello2
+'
+
 test_expect_success 'change file mode and reiterate content' '
 	reinit_git &&
 	cat >expect <<-\EOF &&
-- 
1.7.4.1

^ permalink raw reply related	[flat|nested] 72+ messages in thread

* [PATCH svn-fe 0/4] vcs-svn: null bytes in properties
  2011-03-24 12:43       ` [PATCH] fixup! vcs-svn: improve reporting of input errors David Barr
  2011-03-25  1:12         ` Jonathan Nieder
@ 2011-03-25  3:34         ` Jonathan Nieder
  2011-03-25  4:07           ` [PATCH 1/4] vcs-svn: make reading of properties binary-safe Jonathan Nieder
                             ` (3 more replies)
  1 sibling, 4 replies; 72+ messages in thread
From: Jonathan Nieder @ 2011-03-25  3:34 UTC (permalink / raw)
  To: David Barr; +Cc: Git Mailing List, Ramkumar Ramachandra, Junio C Hamano

David Barr wrote:

> it is permissible for both keys and values to contain
> nul characters.

You're right --- it's a regression to error out, though we never did
support it all that well.  How about this?

This doesn't take care of preserving embedded null bytes in the author
name.  That can come another day, I suppose.

Jonathan Nieder (4):
  vcs-svn: make reading of properties binary-safe
  vcs-svn: remove buffer_read_string
  vcs-svn: avoid unnecessary copying of log message and author
  vcs-svn: handle log message with embedded null bytes

 t/t0081-line-buffer.sh  |   35 ++++++----------
 t/t9010-svn-fe.sh       |  104 +++++++++++++++++++++++++++++++++++++++++++++++
 test-line-buffer.c      |    6 ---
 vcs-svn/fast_export.c   |   12 +++--
 vcs-svn/fast_export.h   |    9 ++--
 vcs-svn/line_buffer.c   |    8 ----
 vcs-svn/line_buffer.h   |    4 +-
 vcs-svn/line_buffer.txt |   12 +----
 vcs-svn/repo_tree.c     |    5 +-
 vcs-svn/repo_tree.h     |    4 +-
 vcs-svn/svndump.c       |   42 +++++++++----------
 11 files changed, 157 insertions(+), 84 deletions(-)

^ permalink raw reply	[flat|nested] 72+ messages in thread

* [PATCH 1/4] vcs-svn: make reading of properties binary-safe
  2011-03-25  3:34         ` [PATCH svn-fe 0/4] vcs-svn: null bytes in properties Jonathan Nieder
@ 2011-03-25  4:07           ` Jonathan Nieder
  2011-03-28 15:34             ` tb
  2011-03-25  4:09           ` [PATCH 2/4] vcs-svn: remove buffer_read_string Jonathan Nieder
                             ` (2 subsequent siblings)
  3 siblings, 1 reply; 72+ messages in thread
From: Jonathan Nieder @ 2011-03-25  4:07 UTC (permalink / raw)
  To: David Barr; +Cc: Git Mailing List, Ramkumar Ramachandra, Junio C Hamano

A caller to buffer_read_string cannot easily tell the difference
between the string "foo" followed by an early end of file and the
string "foo\0bar\0baz".  In a half-hearted attempt to catch early EOF,
c9d1c8ba (2010-12-28) introduced a safety strlen(val) == len for
property keys and values, to at least keep svn-fe from reading
uninitialized data when a property list ends early due to EOF.

But it is permissible for both keys and values to contain null
characters, so in handling revision 59151 of the ASF repository svn-fe
encounters a null byte and produces the following message:

 fatal: invalid dump: unexpected end of file

Fix it by using buffer_read_binary to read to a strbuf (and keep track
of the actual length read).  Most consumers of properties still use
C-style strings, so in practice we still can't use an author or log
message with embedded nuls, but at least this way svn-fe won't error
out.

Reported-by: David Barr <david.barr@cordelta.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
---
 t/t9010-svn-fe.sh |   27 +++++++++++++++++++++++++++
 vcs-svn/svndump.c |   24 ++++++++++--------------
 2 files changed, 37 insertions(+), 14 deletions(-)

diff --git a/t/t9010-svn-fe.sh b/t/t9010-svn-fe.sh
index 5a6a4b9..47f1e4f 100755
--- a/t/t9010-svn-fe.sh
+++ b/t/t9010-svn-fe.sh
@@ -370,6 +370,33 @@ test_expect_failure 'change file mode but keep old content' '
 	test_cmp hello actual.target
 '
 
+test_expect_success 'null byte in property value' '
+	reinit_git &&
+	echo "commit message" >expect.message &&
+	{
+		properties \
+			unimportant "something with a null byte (Q)" \
+			svn:log "commit message"&&
+		echo PROPS-END
+	} |
+	q_to_nul >props &&
+	{
+		cat <<-\EOF &&
+		SVN-fs-dump-format-version: 3
+
+		Revision-number: 1
+		EOF
+		echo Prop-content-length: $(wc -c <props) &&
+		echo Content-length: $(wc -c <props) &&
+		echo &&
+		cat props
+	} >nullprop.dump &&
+	test-svn-fe nullprop.dump >stream &&
+	git fast-import <stream &&
+	git diff-tree --always -s --format=%s HEAD >actual.message &&
+	test_cmp expect.message actual.message
+'
+
 test_expect_success 'change file mode and reiterate content' '
 	reinit_git &&
 	cat >expect <<-\EOF &&
diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c
index ea5b128..c00f031 100644
--- a/vcs-svn/svndump.c
+++ b/vcs-svn/svndump.c
@@ -147,6 +147,7 @@ static void die_short_read(void)
 static void read_props(void)
 {
 	static struct strbuf key = STRBUF_INIT;
+	static struct strbuf val = STRBUF_INIT;
 	const char *t;
 	/*
 	 * NEEDSWORK: to support simple mode changes like
@@ -163,15 +164,15 @@ static void read_props(void)
 	uint32_t type_set = 0;
 	while ((t = buffer_read_line(&input)) && strcmp(t, "PROPS-END")) {
 		uint32_t len;
-		const char *val;
 		const char type = t[0];
 		int ch;
 
 		if (!type || t[1] != ' ')
 			die("invalid property line: %s\n", t);
 		len = atoi(&t[2]);
-		val = buffer_read_string(&input, len);
-		if (!val || strlen(val) != len)
+		strbuf_reset(&val);
+		buffer_read_binary(&input, &val, len);
+		if (val.len < len)
 			die_short_read();
 
 		/* Discard trailing newline. */
@@ -179,22 +180,17 @@ static void read_props(void)
 		if (ch == EOF)
 			die_short_read();
 		if (ch != '\n')
-			die("invalid dump: expected newline after %s", val);
+			die("invalid dump: expected newline after %s", val.buf);
 
 		switch (type) {
 		case 'K':
+			strbuf_swap(&key, &val);
+			continue;
 		case 'D':
-			strbuf_reset(&key);
-			if (val)
-				strbuf_add(&key, val, len);
-			if (type == 'K')
-				continue;
-			assert(type == 'D');
-			val = NULL;
-			len = 0;
-			/* fall through */
+			handle_property(&val, NULL, 0, &type_set);
+			continue;
 		case 'V':
-			handle_property(&key, val, len, &type_set);
+			handle_property(&key, val.buf, len, &type_set);
 			strbuf_reset(&key);
 			continue;
 		default:
-- 
1.7.4.1

^ permalink raw reply related	[flat|nested] 72+ messages in thread

* [PATCH 2/4] vcs-svn: remove buffer_read_string
  2011-03-25  3:34         ` [PATCH svn-fe 0/4] vcs-svn: null bytes in properties Jonathan Nieder
  2011-03-25  4:07           ` [PATCH 1/4] vcs-svn: make reading of properties binary-safe Jonathan Nieder
@ 2011-03-25  4:09           ` Jonathan Nieder
  2011-03-25  4:10           ` [PATCH 3/4] vcs-svn: avoid unnecessary copying of log message and author Jonathan Nieder
  2011-03-25  4:11           ` [PATCH 4/4] vcs-svn: handle log message with embedded null bytes Jonathan Nieder
  3 siblings, 0 replies; 72+ messages in thread
From: Jonathan Nieder @ 2011-03-25  4:09 UTC (permalink / raw)
  To: David Barr; +Cc: Git Mailing List, Ramkumar Ramachandra, Junio C Hamano

All previous users of buffer_read_string have already been converted
to use the more intuitive buffer_read_binary, so remove the old API to
avoid some confusion.

Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
---
 t/t0081-line-buffer.sh  |   35 +++++++++++++----------------------
 test-line-buffer.c      |    6 ------
 vcs-svn/line_buffer.c   |    8 --------
 vcs-svn/line_buffer.h   |    4 +---
 vcs-svn/line_buffer.txt |   12 +++---------
 5 files changed, 17 insertions(+), 48 deletions(-)

diff --git a/t/t0081-line-buffer.sh b/t/t0081-line-buffer.sh
index 550fad0..1dbe1c9 100755
--- a/t/t0081-line-buffer.sh
+++ b/t/t0081-line-buffer.sh
@@ -53,7 +53,7 @@ long_read_test () {
 		} >input &
 	} &&
 	test-line-buffer input <<-EOF >output &&
-	read $readsize
+	binary $readsize
 	copy $copysize
 	EOF
 	kill $! &&
@@ -71,23 +71,23 @@ test_expect_success 'setup: have pipes?' '
 '
 
 test_expect_success 'hello world' '
-	echo HELLO >expect &&
+	echo ">HELLO" >expect &&
 	test-line-buffer <<-\EOF >actual &&
-	read 6
+	binary 6
 	HELLO
 	EOF
 	test_cmp expect actual
 '
 
 test_expect_success PIPE '0-length read, no input available' '
-	>expect &&
+	printf ">" >expect &&
 	rm -f input &&
 	mkfifo input &&
 	{
 		sleep 100 >input &
 	} &&
 	test-line-buffer input <<-\EOF >actual &&
-	read 0
+	binary 0
 	copy 0
 	EOF
 	kill $! &&
@@ -95,9 +95,9 @@ test_expect_success PIPE '0-length read, no input available' '
 '
 
 test_expect_success '0-length read, send along greeting' '
-	echo HELLO >expect &&
+	echo ">HELLO" >expect &&
 	test-line-buffer <<-\EOF >actual &&
-	read 0
+	binary 0
 	copy 6
 	HELLO
 	EOF
@@ -105,7 +105,7 @@ test_expect_success '0-length read, send along greeting' '
 '
 
 test_expect_success PIPE '1-byte read, no input available' '
-	printf "%s" ab >expect &&
+	printf ">%s" ab >expect &&
 	rm -f input &&
 	mkfifo input &&
 	{
@@ -116,7 +116,7 @@ test_expect_success PIPE '1-byte read, no input available' '
 		} >input &
 	} &&
 	test-line-buffer input <<-\EOF >actual &&
-	read 1
+	binary 1
 	copy 1
 	EOF
 	kill $! &&
@@ -140,15 +140,6 @@ test_expect_success 'read from file descriptor' '
 	test_cmp expect actual
 '
 
-test_expect_success 'buffer_read_string copes with null byte' '
-	>expect &&
-	q_to_nul <<-\EOF | test-line-buffer >actual &&
-	read 2
-	Q
-	EOF
-	test_cmp expect actual
-'
-
 test_expect_success 'skip, copy null byte' '
 	echo Q | q_to_nul >expect &&
 	q_to_nul <<-\EOF | test-line-buffer >actual &&
@@ -170,18 +161,18 @@ test_expect_success 'read null byte' '
 '
 
 test_expect_success 'long reads are truncated' '
-	echo foo >expect &&
+	echo ">foo" >expect &&
 	test-line-buffer <<-\EOF >actual &&
-	read 5
+	binary 5
 	foo
 	EOF
 	test_cmp expect actual
 '
 
 test_expect_success 'long copies are truncated' '
-	printf "%s\n" "" foo >expect &&
+	printf "%s\n" ">" foo >expect &&
 	test-line-buffer <<-\EOF >actual &&
-	read 1
+	binary 1
 
 	copy 5
 	foo
diff --git a/test-line-buffer.c b/test-line-buffer.c
index 25b20b9..7ec9b13 100644
--- a/test-line-buffer.c
+++ b/test-line-buffer.c
@@ -32,12 +32,6 @@ static void handle_command(const char *command, const char *arg, struct line_buf
 			buffer_copy_bytes(buf, strtouint32(arg));
 			return;
 		}
-	case 'r':
-		if (!prefixcmp(command, "read ")) {
-			const char *s = buffer_read_string(buf, strtouint32(arg));
-			fputs(s, stdout);
-			return;
-		}
 	case 's':
 		if (!prefixcmp(command, "skip ")) {
 			buffer_skip_bytes(buf, strtouint32(arg));
diff --git a/vcs-svn/line_buffer.c b/vcs-svn/line_buffer.c
index 33e733a..c390387 100644
--- a/vcs-svn/line_buffer.c
+++ b/vcs-svn/line_buffer.c
@@ -91,13 +91,6 @@ char *buffer_read_line(struct line_buffer *buf)
 	return buf->line_buffer;
 }
 
-char *buffer_read_string(struct line_buffer *buf, uint32_t len)
-{
-	strbuf_reset(&buf->blob_buffer);
-	strbuf_fread(&buf->blob_buffer, len, buf->infile);
-	return ferror(buf->infile) ? NULL : buf->blob_buffer.buf;
-}
-
 void buffer_read_binary(struct line_buffer *buf,
 				struct strbuf *sb, uint32_t size)
 {
@@ -134,5 +127,4 @@ off_t buffer_skip_bytes(struct line_buffer *buf, off_t nbytes)
 
 void buffer_reset(struct line_buffer *buf)
 {
-	strbuf_release(&buf->blob_buffer);
 }
diff --git a/vcs-svn/line_buffer.h b/vcs-svn/line_buffer.h
index f5c468a..d0b22dd 100644
--- a/vcs-svn/line_buffer.h
+++ b/vcs-svn/line_buffer.h
@@ -7,10 +7,9 @@
 
 struct line_buffer {
 	char line_buffer[LINE_BUFFER_LEN];
-	struct strbuf blob_buffer;
 	FILE *infile;
 };
-#define LINE_BUFFER_INIT {"", STRBUF_INIT, NULL}
+#define LINE_BUFFER_INIT { "", NULL }
 
 int buffer_init(struct line_buffer *buf, const char *filename);
 int buffer_fdinit(struct line_buffer *buf, int fd);
@@ -23,7 +22,6 @@ long buffer_tmpfile_prepare_to_read(struct line_buffer *buf);
 
 int buffer_ferror(struct line_buffer *buf);
 char *buffer_read_line(struct line_buffer *buf);
-char *buffer_read_string(struct line_buffer *buf, uint32_t len);
 int buffer_read_char(struct line_buffer *buf);
 void buffer_read_binary(struct line_buffer *buf, struct strbuf *sb, uint32_t len);
 /* Returns number of bytes read (not necessarily written). */
diff --git a/vcs-svn/line_buffer.txt b/vcs-svn/line_buffer.txt
index 4ef0755..8e139eb 100644
--- a/vcs-svn/line_buffer.txt
+++ b/vcs-svn/line_buffer.txt
@@ -16,8 +16,8 @@ The calling program:
 
  - initializes a `struct line_buffer` to LINE_BUFFER_INIT
  - specifies a file to read with `buffer_init`
- - processes input with `buffer_read_line`, `buffer_read_string`,
-   `buffer_skip_bytes`, and `buffer_copy_bytes`
+ - processes input with `buffer_read_line`, `buffer_skip_bytes`,
+   and `buffer_copy_bytes`
  - closes the file with `buffer_deinit`, perhaps to start over and
    read another file.
 
@@ -37,7 +37,7 @@ the calling program.  A program
    the temporary file
  - declares writing is over with `buffer_tmpfile_prepare_to_read`
  - can re-read what was written with `buffer_read_line`,
-   `buffer_read_string`, and so on
+   `buffer_copy_bytes`, and so on
  - can reuse the temporary file by calling `buffer_tmpfile_rewind`
    again
  - removes the temporary file with `buffer_deinit`, perhaps to
@@ -64,12 +64,6 @@ Functions
 	Read a line and strip off the trailing newline.
 	On failure or end of file, returns NULL.
 
-`buffer_read_string`::
-	Read `len` characters of input or up to the end of the
-	file, whichever comes first.  Returns NULL on error.
-	Returns whatever characters were read (possibly "")
-	for end of file.
-
 `buffer_copy_bytes`::
 	Read `len` bytes of input and dump them to the standard output
 	stream.  Returns early for error or end of file.
-- 
1.7.4.1

^ permalink raw reply related	[flat|nested] 72+ messages in thread

* [PATCH 3/4] vcs-svn: avoid unnecessary copying of log message and author
  2011-03-25  3:34         ` [PATCH svn-fe 0/4] vcs-svn: null bytes in properties Jonathan Nieder
  2011-03-25  4:07           ` [PATCH 1/4] vcs-svn: make reading of properties binary-safe Jonathan Nieder
  2011-03-25  4:09           ` [PATCH 2/4] vcs-svn: remove buffer_read_string Jonathan Nieder
@ 2011-03-25  4:10           ` Jonathan Nieder
  2011-03-25  4:11           ` [PATCH 4/4] vcs-svn: handle log message with embedded null bytes Jonathan Nieder
  3 siblings, 0 replies; 72+ messages in thread
From: Jonathan Nieder @ 2011-03-25  4:10 UTC (permalink / raw)
  To: David Barr; +Cc: Git Mailing List, Ramkumar Ramachandra, Junio C Hamano

Use strbuf_swap when storing the svn:log and svn:author properties, so
pointers to rather than the contents of buffers get copied.  The main
effect should be to make the code a little easier to read.

Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
---
 vcs-svn/svndump.c |   20 ++++++++++----------
 1 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c
index c00f031..88ecef1 100644
--- a/vcs-svn/svndump.c
+++ b/vcs-svn/svndump.c
@@ -83,7 +83,7 @@ static void reset_dump_ctx(const char *url)
 }
 
 static void handle_property(const struct strbuf *key_buf,
-				const char *val, uint32_t len,
+				struct strbuf *val,
 				uint32_t *type_set)
 {
 	const char *key = key_buf->buf;
@@ -95,23 +95,23 @@ static void handle_property(const struct strbuf *key_buf,
 			break;
 		if (!val)
 			die("invalid dump: unsets svn:log");
-		strbuf_reset(&rev_ctx.log);
-		strbuf_add(&rev_ctx.log, val, len);
+		strbuf_swap(&rev_ctx.log, val);
 		break;
 	case sizeof("svn:author"):
 		if (constcmp(key, "svn:author"))
 			break;
-		strbuf_reset(&rev_ctx.author);
-		if (val)
-			strbuf_add(&rev_ctx.author, val, len);
+		if (!val)
+			strbuf_reset(&rev_ctx.author);
+		else
+			strbuf_swap(&rev_ctx.author, val);
 		break;
 	case sizeof("svn:date"):
 		if (constcmp(key, "svn:date"))
 			break;
 		if (!val)
 			die("invalid dump: unsets svn:date");
-		if (parse_date_basic(val, &rev_ctx.timestamp, NULL))
-			warning("invalid timestamp: %s", val);
+		if (parse_date_basic(val->buf, &rev_ctx.timestamp, NULL))
+			warning("invalid timestamp: %s", val->buf);
 		break;
 	case sizeof("svn:executable"):
 	case sizeof("svn:special"):
@@ -187,10 +187,10 @@ static void read_props(void)
 			strbuf_swap(&key, &val);
 			continue;
 		case 'D':
-			handle_property(&val, NULL, 0, &type_set);
+			handle_property(&val, NULL, &type_set);
 			continue;
 		case 'V':
-			handle_property(&key, val.buf, len, &type_set);
+			handle_property(&key, &val, &type_set);
 			strbuf_reset(&key);
 			continue;
 		default:
-- 
1.7.4.1

^ permalink raw reply related	[flat|nested] 72+ messages in thread

* [PATCH 4/4] vcs-svn: handle log message with embedded null bytes
  2011-03-25  3:34         ` [PATCH svn-fe 0/4] vcs-svn: null bytes in properties Jonathan Nieder
                             ` (2 preceding siblings ...)
  2011-03-25  4:10           ` [PATCH 3/4] vcs-svn: avoid unnecessary copying of log message and author Jonathan Nieder
@ 2011-03-25  4:11           ` Jonathan Nieder
  3 siblings, 0 replies; 72+ messages in thread
From: Jonathan Nieder @ 2011-03-25  4:11 UTC (permalink / raw)
  To: David Barr; +Cc: Git Mailing List, Ramkumar Ramachandra, Junio C Hamano

Use fwrite instead of printf to write the log message to fast-import
so embedded ASCII NUL characters can be preserved.

Currently "git log" doesn't show the embedded NULs but "git cat-file
commit" can.

Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
---
 t/t9010-svn-fe.sh     |   77 +++++++++++++++++++++++++++++++++++++++++++++++++
 vcs-svn/fast_export.c |   12 ++++---
 vcs-svn/fast_export.h |    9 +++--
 vcs-svn/repo_tree.c   |    5 ++-
 vcs-svn/repo_tree.h   |    4 +-
 vcs-svn/svndump.c     |    2 +-
 6 files changed, 95 insertions(+), 14 deletions(-)

diff --git a/t/t9010-svn-fe.sh b/t/t9010-svn-fe.sh
index 47f1e4f..be35139 100755
--- a/t/t9010-svn-fe.sh
+++ b/t/t9010-svn-fe.sh
@@ -397,6 +397,83 @@ test_expect_success 'null byte in property value' '
 	test_cmp expect.message actual.message
 '
 
+test_expect_success 'null bytes' '
+	# Caveat: svnadmin 1.6.16 (r1073529) truncates at \0 in the
+	# svn:specialQnotreally example.
+	reinit_git &&
+	cat >expect <<-\EOF &&
+	OBJID
+	:100644 100644 OBJID OBJID M	greeting
+	OBJID
+	:000000 100644 OBJID OBJID A	greeting
+	EOF
+	printf "\n%s" "something with a null byte (Q)" >expect.message &&
+	printf "%s\n" "helQo" >expect.hello1 &&
+	printf "%s\n" "link hello" >expect.hello2 &&
+	{
+		properties svn:log "something with a null byte (Q)" &&
+		echo PROPS-END
+	} |
+	q_to_nul >props &&
+	{
+		q_to_nul <<-\EOF &&
+		SVN-fs-dump-format-version: 3
+
+		Revision-number: 1
+		Prop-content-length: 10
+		Content-length: 10
+
+		PROPS-END
+
+		Node-path: greeting
+		Node-kind: file
+		Node-action: add
+		Prop-content-length: 10
+		Text-content-length: 6
+		Content-length: 16
+
+		PROPS-END
+		helQo
+
+		Revision-number: 2
+		EOF
+		echo Prop-content-length: $(wc -c <props) &&
+		echo Content-length: $(wc -c <props) &&
+		echo &&
+		cat props &&
+		q_to_nul <<-\EOF
+
+		Node-path: greeting
+		Node-kind: file
+		Node-action: change
+		Prop-content-length: 43
+		Text-content-length: 11
+		Content-length: 54
+
+		K 21
+		svn:specialQnotreally
+		V 1
+		*
+		PROPS-END
+		link hello
+		EOF
+	} >8bitclean.dump &&
+	test-svn-fe 8bitclean.dump >stream &&
+	git fast-import <stream &&
+	{
+		git rev-list HEAD |
+		git diff-tree --root --stdin |
+		sed "s/$_x40/OBJID/g"
+	} >actual &&
+	git cat-file commit HEAD | nul_to_q | sed -ne "/^\$/,\$ p" >actual.message &&
+	git cat-file blob HEAD^:greeting | nul_to_q >actual.hello1 &&
+	git cat-file blob HEAD:greeting | nul_to_q >actual.hello2 &&
+	test_cmp expect actual &&
+	test_cmp expect.message actual.message &&
+	test_cmp expect.hello1 actual.hello1 &&
+	test_cmp expect.hello2 actual.hello2
+'
+
 test_expect_success 'change file mode and reiterate content' '
 	reinit_git &&
 	cat >expect <<-\EOF &&
diff --git a/vcs-svn/fast_export.c b/vcs-svn/fast_export.c
index a4d4d99..2e5bb67 100644
--- a/vcs-svn/fast_export.c
+++ b/vcs-svn/fast_export.c
@@ -31,12 +31,14 @@ void fast_export_modify(uint32_t depth, uint32_t *path, uint32_t mode,
 }
 
 static char gitsvnline[MAX_GITSVN_LINE_LEN];
-void fast_export_commit(uint32_t revision, const char *author, char *log,
+void fast_export_commit(uint32_t revision, const char *author,
+			const struct strbuf *log,
 			const char *uuid, const char *url,
 			unsigned long timestamp)
 {
+	static const struct strbuf empty = STRBUF_INIT;
 	if (!log)
-		log = "";
+		log = &empty;
 	if (*uuid && *url) {
 		snprintf(gitsvnline, MAX_GITSVN_LINE_LEN,
 				"\n\ngit-svn-id: %s@%"PRIu32" %s\n",
@@ -49,9 +51,9 @@ void fast_export_commit(uint32_t revision, const char *author, char *log,
 		   *author ? author : "nobody",
 		   *author ? author : "nobody",
 		   *uuid ? uuid : "local", timestamp);
-	printf("data %"PRIu32"\n%s%s\n",
-		   (uint32_t) (strlen(log) + strlen(gitsvnline)),
-		   log, gitsvnline);
+	printf("data %"PRIuMAX"\n", log->len + strlen(gitsvnline));
+	fwrite(log->buf, log->len, 1, stdout);
+	printf("%s\n", gitsvnline);
 	if (!first_commit_done) {
 		if (revision > 1)
 			printf("from refs/heads/master^0\n");
diff --git a/vcs-svn/fast_export.h b/vcs-svn/fast_export.h
index 05cf97f..5878381 100644
--- a/vcs-svn/fast_export.h
+++ b/vcs-svn/fast_export.h
@@ -1,14 +1,15 @@
 #ifndef FAST_EXPORT_H_
 #define FAST_EXPORT_H_
 
-#include "line_buffer.h"
+struct line_buffer;
+struct strbuf;
 
 void fast_export_delete(uint32_t depth, uint32_t *path);
 void fast_export_modify(uint32_t depth, uint32_t *path, uint32_t mode,
 			uint32_t mark);
-void fast_export_commit(uint32_t revision, const char *author, char *log,
-			const char *uuid, const char *url,
-			unsigned long timestamp);
+void fast_export_commit(uint32_t revision, const char *author,
+			const struct strbuf *log, const char *uuid,
+			const char *url, unsigned long timestamp);
 void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len,
 		      struct line_buffer *input);
 
diff --git a/vcs-svn/repo_tree.c b/vcs-svn/repo_tree.c
index d722e32..8caa015 100644
--- a/vcs-svn/repo_tree.c
+++ b/vcs-svn/repo_tree.c
@@ -278,8 +278,9 @@ void repo_diff(uint32_t r1, uint32_t r2)
 		    repo_commit_root_dir(commit_pointer(r2)));
 }
 
-void repo_commit(uint32_t revision, const char *author, char *log,
-		 const char *uuid, const char *url, unsigned long timestamp)
+void repo_commit(uint32_t revision, const char *author,
+		const struct strbuf *log, const char *uuid, const char *url,
+		unsigned long timestamp)
 {
 	fast_export_commit(revision, author, log, uuid, url, timestamp);
 	dent_commit();
diff --git a/vcs-svn/repo_tree.h b/vcs-svn/repo_tree.h
index a1b0e87..37bde2e 100644
--- a/vcs-svn/repo_tree.h
+++ b/vcs-svn/repo_tree.h
@@ -1,7 +1,7 @@
 #ifndef REPO_TREE_H_
 #define REPO_TREE_H_
 
-#include "git-compat-util.h"
+struct strbuf;
 
 #define REPO_MODE_DIR 0040000
 #define REPO_MODE_BLB 0100644
@@ -18,7 +18,7 @@ uint32_t repo_read_path(const uint32_t *path);
 uint32_t repo_read_mode(const uint32_t *path);
 void repo_delete(uint32_t *path);
 void repo_commit(uint32_t revision, const char *author,
-		char *log, const char *uuid, const char *url,
+		const struct strbuf *log, const char *uuid, const char *url,
 		long unsigned timestamp);
 void repo_diff(uint32_t r1, uint32_t r2);
 void repo_init(void);
diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c
index 88ecef1..eef49ca 100644
--- a/vcs-svn/svndump.c
+++ b/vcs-svn/svndump.c
@@ -274,7 +274,7 @@ static void handle_revision(void)
 {
 	if (rev_ctx.revision)
 		repo_commit(rev_ctx.revision, rev_ctx.author.buf,
-			rev_ctx.log.buf, dump_ctx.uuid.buf, dump_ctx.url.buf,
+			&rev_ctx.log, dump_ctx.uuid.buf, dump_ctx.url.buf,
 			rev_ctx.timestamp);
 }
 
-- 
1.7.4.1

^ permalink raw reply related	[flat|nested] 72+ messages in thread

* Re: [PULL svn-fe] vcs-svn: simplifications, error handling improvements
  2011-03-23  6:03       ` Junio C Hamano
@ 2011-03-26  6:42         ` Jonathan Nieder
  2011-03-26  9:49           ` t0081-line-buffer.sh hangs (Re: [PULL svn-fe] vcs-svn: simplifications, error handling improvements) Jonathan Nieder
  0 siblings, 1 reply; 72+ messages in thread
From: Jonathan Nieder @ 2011-03-26  6:42 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Git Mailing List, Ævar Arnfjörð Bjarmason

Junio C Hamano wrote:

> A bit more datapoint.  The first one works just fine, while the second one
> gets stuck:
>
>     $ cd t && make T=t0081-line-buffer.sh test
>     $ cd t && make T=t0081-line-buffer.sh prove
>
> I notice that [sh] spawned from prove is orphaned.
>
> 21643 pts/9    Ss     0:00      \_ bash
> 17149 pts/9    S+     0:00      |   \_ make T=t0081-line-buffer.sh prove
> 17155 pts/9    S+     0:00      |       \_ /bin/sh -c echo "*** prove ***"; GIT_CONFIG=.git/
> 17156 pts/9    S+     0:00      |           \_ /usr/bin/perl /usr/bin/prove --exec /bin/sh t
> 17157 pts/9    Z+     0:00      |               \_ [sh] <defunct>

Hmm, looks like the "kill $!" is not working like it is supposed to.
What is your /bin/sh?  How far does

	make T=0081-line-buffer.sh prove SHELL_PATH_SH='/bin/sh -x'

get before the lull?

^ permalink raw reply	[flat|nested] 72+ messages in thread

* Re: [PULL svn-fe] vcs-svn: simplifications, error handling improvements
  2011-03-23  5:46     ` Junio C Hamano
                         ` (2 preceding siblings ...)
  2011-03-24 12:43       ` [PATCH] fixup! vcs-svn: improve reporting of input errors David Barr
@ 2011-03-26  6:46       ` Jonathan Nieder
  2011-03-26 18:36         ` Junio C Hamano
  3 siblings, 1 reply; 72+ messages in thread
From: Jonathan Nieder @ 2011-03-26  6:46 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Git Mailing List, David Barr, Ramkumar Ramachandra

Hi,

Junio C Hamano wrote:

> Done.

Turns out that introduced a regression (in handling of properties with
embedded NULs; thanks to David for catching and debugging it).  Could
you pull

  git://repo.or.cz/git/jrn.git svn-fe

for a fix and some other improvements in the area?

Sorry for the trouble,
Jonathan Nieder (4):
      vcs-svn: make reading of properties binary-safe
      vcs-svn: remove buffer_read_string
      vcs-svn: avoid unnecessary copying of log message and author
      vcs-svn: handle log message with embedded NUL

 t/t0081-line-buffer.sh  |   35 ++++++----------
 t/t9010-svn-fe.sh       |  104 +++++++++++++++++++++++++++++++++++++++++++++++
 test-line-buffer.c      |    6 ---
 vcs-svn/fast_export.c   |   12 +++--
 vcs-svn/fast_export.h   |    7 ++-
 vcs-svn/line_buffer.c   |    8 ----
 vcs-svn/line_buffer.h   |    4 +-
 vcs-svn/line_buffer.txt |   12 +----
 vcs-svn/repo_tree.c     |    5 +-
 vcs-svn/repo_tree.h     |    4 +-
 vcs-svn/svndump.c       |   42 +++++++++----------
 11 files changed, 156 insertions(+), 83 deletions(-)

^ permalink raw reply	[flat|nested] 72+ messages in thread

* t0081-line-buffer.sh hangs (Re: [PULL svn-fe] vcs-svn: simplifications, error handling improvements)
  2011-03-26  6:42         ` Jonathan Nieder
@ 2011-03-26  9:49           ` Jonathan Nieder
  0 siblings, 0 replies; 72+ messages in thread
From: Jonathan Nieder @ 2011-03-26  9:49 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Git Mailing List, Ævar Arnfjörð Bjarmason

Jonathan Nieder wrote:

> What is your /bin/sh?

Looks like

	prove --exec=bash t0081-line-buffer.sh

is enough to reproduce this, while

	bash t0081-line-buffer.sh

works fine.  And it seems to be racy --- with prove, sometimes the
first test hangs, sometimes the 13th, sometimes none.  Sadly that's
all there's time for tonight.

Thanks for catching it.

Puzzled,
Jonathan

^ permalink raw reply	[flat|nested] 72+ messages in thread

* Re: [PULL svn-fe] vcs-svn: simplifications, error handling improvements
  2011-03-26  6:46       ` [PULL svn-fe] vcs-svn: simplifications, error handling improvements Jonathan Nieder
@ 2011-03-26 18:36         ` Junio C Hamano
  2011-03-28  0:38           ` [PATCH svn-fe] vcs-svn: add missing cast to printf argument Jonathan Nieder
  0 siblings, 1 reply; 72+ messages in thread
From: Junio C Hamano @ 2011-03-26 18:36 UTC (permalink / raw)
  To: Jonathan Nieder; +Cc: Git Mailing List, David Barr, Ramkumar Ramachandra

Jonathan Nieder <jrnieder@gmail.com> writes:

> Turns out that introduced a regression (in handling of properties with
> embedded NULs; thanks to David for catching and debugging it).  Could
> you pull
>
>   git://repo.or.cz/git/jrn.git svn-fe
>
> for a fix and some other improvements in the area?

Thanks.

^ permalink raw reply	[flat|nested] 72+ messages in thread

* [PATCH svn-fe] vcs-svn: add missing cast to printf argument
  2011-03-26 18:36         ` Junio C Hamano
@ 2011-03-28  0:38           ` Jonathan Nieder
  0 siblings, 0 replies; 72+ messages in thread
From: Jonathan Nieder @ 2011-03-28  0:38 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Git Mailing List, David Barr, Ramkumar Ramachandra

gcc -m32 correctly warns:

 vcs-svn/fast_export.c: In function 'fast_export_commit':
 vcs-svn/fast_export.c:54:2: warning: format '%llu' expects
   argument of type 'long long unsigned int', but argument 2
   has type 'unsigned int' [-Wformat]

Fix it.

Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
---
Junio C Hamano wrote:

> Thanks.

Any time.  But clearly I can't be trusted. :/

 vcs-svn/fast_export.c |    3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/vcs-svn/fast_export.c b/vcs-svn/fast_export.c
index 2e5bb67..99ed70b 100644
--- a/vcs-svn/fast_export.c
+++ b/vcs-svn/fast_export.c
@@ -51,7 +51,8 @@ void fast_export_commit(uint32_t revision, const char *author,
 		   *author ? author : "nobody",
 		   *author ? author : "nobody",
 		   *uuid ? uuid : "local", timestamp);
-	printf("data %"PRIuMAX"\n", log->len + strlen(gitsvnline));
+	printf("data %"PRIuMAX"\n",
+		(uintmax_t) (log->len + strlen(gitsvnline)));
 	fwrite(log->buf, log->len, 1, stdout);
 	printf("%s\n", gitsvnline);
 	if (!first_commit_done) {
-- 
1.7.4.2.660.g270d4b.dirty

^ permalink raw reply related	[flat|nested] 72+ messages in thread

* Re: [PATCH 06/16] vcs-svn: skeleton of an svn delta parser
  2011-03-19  7:20   ` [PATCH 06/16] vcs-svn: skeleton of an svn delta parser David Barr
@ 2011-03-28  3:30     ` Jonathan Nieder
  0 siblings, 0 replies; 72+ messages in thread
From: Jonathan Nieder @ 2011-03-28  3:30 UTC (permalink / raw)
  To: David Barr; +Cc: Git Mailing List, Ramkumar Ramachandra

(culling cc list)
Hi,

David Barr wrote:

> --- a/vcs-svn/line_buffer.c
> +++ b/vcs-svn/line_buffer.c
> @@ -98,10 +98,10 @@ char *buffer_read_string(struct line_buffer *buf, uint32_t len)
>  	return ferror(buf->infile) ? NULL : buf->blob_buffer.buf;
>  }
>  
> -void buffer_read_binary(struct line_buffer *buf,
> -				struct strbuf *sb, uint32_t size)
> +off_t buffer_read_binary(struct line_buffer *buf,
> +				struct strbuf *sb, off_t size)
>  {
> -	strbuf_fread(sb, size, buf->infile);
> +	return strbuf_fread(sb, size, buf->infile);
>  }

Apparently this change is from in an evil merge.  Yikes.

Anyway, I think the original patch was something like the following.
Would you mind if the parameter and return value go back to being of
type size_t (to avoid a possibly problematic conversion when passing
values to and from strbuf_fread)?

-- 8< --
Date: Sun, 2 Jan 2011 21:37:36 -0600
Subject: vcs-svn: make buffer_read_binary API more convenient

buffer_read_binary is a thin wrapper around fread, but its signature
is wrong:

 - fread can fill an arbitrary in-memory buffer.  buffer_read_binary
   is limited to buffers whose size is representable by a 32-bit
   integer.
 - The result from fread is the number of bytes actually read.
   buffer_read_binary only reports the number of bytes read by
   incrementing sb->len by that amount and returns void.

Fix both: let buffer_read_binary accept a size_t instead of uint32_t
for the number of bytes to try to read and as a convenience return the
number of bytes read.

Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
---
 vcs-svn/line_buffer.c |    6 +++---
 vcs-svn/line_buffer.h |    2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/vcs-svn/line_buffer.c b/vcs-svn/line_buffer.c
index c390387..01fcb84 100644
--- a/vcs-svn/line_buffer.c
+++ b/vcs-svn/line_buffer.c
@@ -91,10 +91,10 @@ char *buffer_read_line(struct line_buffer *buf)
 	return buf->line_buffer;
 }
 
-void buffer_read_binary(struct line_buffer *buf,
-				struct strbuf *sb, uint32_t size)
+size_t buffer_read_binary(struct line_buffer *buf,
+				struct strbuf *sb, size_t size)
 {
-	strbuf_fread(sb, size, buf->infile);
+	return strbuf_fread(sb, size, buf->infile);
 }
 
 off_t buffer_copy_bytes(struct line_buffer *buf, off_t nbytes)
diff --git a/vcs-svn/line_buffer.h b/vcs-svn/line_buffer.h
index d0b22dd..8901f21 100644
--- a/vcs-svn/line_buffer.h
+++ b/vcs-svn/line_buffer.h
@@ -23,7 +23,7 @@ long buffer_tmpfile_prepare_to_read(struct line_buffer *buf);
 int buffer_ferror(struct line_buffer *buf);
 char *buffer_read_line(struct line_buffer *buf);
 int buffer_read_char(struct line_buffer *buf);
-void buffer_read_binary(struct line_buffer *buf, struct strbuf *sb, uint32_t len);
+size_t buffer_read_binary(struct line_buffer *buf, struct strbuf *sb, size_t len);
 /* Returns number of bytes read (not necessarily written). */
 off_t buffer_copy_bytes(struct line_buffer *buf, off_t len);
 off_t buffer_skip_bytes(struct line_buffer *buf, off_t len);
-- 
1.7.4.2.660.g270d4b.dirty

^ permalink raw reply related	[flat|nested] 72+ messages in thread

* Re: vcs-svn: integrate support for text deltas
  2011-03-19  7:20 ` vcs-svn: integrate support for text deltas David Barr
                     ` (15 preceding siblings ...)
  2011-03-19  7:20   ` [PATCH 16/16] vcs-svn: implement text-delta handling David Barr
@ 2011-03-28  7:00   ` Jonathan Nieder
  2011-03-28 11:56     ` David Barr
  16 siblings, 1 reply; 72+ messages in thread
From: Jonathan Nieder @ 2011-03-28  7:00 UTC (permalink / raw)
  To: David Barr
  Cc: Git Mailing List, Ramkumar Ramachandra, Sverre Rabbelier, Tomas Carnecky

Hi,

David Barr wrote:

> As previously, I have tested against
> the ASF subversion repository to increase confidence in the series.
> Hopefully, this brings us a little closer to having full support for
> version 3 of the subversion dump format in master.

Thanks much for digging this up.  (To think, it's been half a year
since the svndiff0 parser was written!)  I've queued everything except
patch 16, and grabbing patch 16 on top of db/delta-applier +
db/vcs-svn-incremental should be just a formality.

The patches have had over three months to cook already.  Whether or
not a nice UI wrapper is ready[*], I'll be testing and then merging them
to svn-fe in the next couple of weeks.  Sorry to take so long at this.

[*] which will probably mean finally figuring out Tomas's work

^ permalink raw reply	[flat|nested] 72+ messages in thread

* Re: vcs-svn: integrate support for text deltas
  2011-03-28  7:00   ` vcs-svn: integrate support for text deltas Jonathan Nieder
@ 2011-03-28 11:56     ` David Barr
  0 siblings, 0 replies; 72+ messages in thread
From: David Barr @ 2011-03-28 11:56 UTC (permalink / raw)
  To: Jonathan Nieder
  Cc: David Barr, Git Mailing List, Ramkumar Ramachandra,
	Sverre Rabbelier, Tomas Carnecky

Hi,

> Thanks much for digging this up.  (To think, it's been half a year
> since the svndiff0 parser was written!)  I've queued everything except
> patch 16, and grabbing patch 16 on top of db/delta-applier +
> db/vcs-svn-incremental should be just a formality.


Once more, I'll run the tests just in case there's any regression.
(Having applied the aforementioned skipped patch.)

--
David Barr.

^ permalink raw reply	[flat|nested] 72+ messages in thread

* Re: [PATCH 1/4] vcs-svn: make reading of properties binary-safe
  2011-03-25  4:07           ` [PATCH 1/4] vcs-svn: make reading of properties binary-safe Jonathan Nieder
@ 2011-03-28 15:34             ` tb
  2011-03-28 19:41               ` Jonathan Nieder
  0 siblings, 1 reply; 72+ messages in thread
From: tb @ 2011-03-28 15:34 UTC (permalink / raw)
  To: Jonathan Nieder
  Cc: David Barr, Git Mailing List, Ramkumar Ramachandra,
	Junio C Hamano, tboegi

Hej,
I'm not sure if this was the origin email ...

Commit e7d04ee147dcbe6af1fa1d2147466696e is OK.

But:
failure on t9010 with commit 195b7ca6f229455da61f9f6b
=============
#               test_cmp expect.message actual.message &&
#               test_cmp expect.hello1 actual.hello1 &&
#               test_cmp expect.hello2 actual.hello2
#
ok 14 - change file mode and reiterate content
ok 15 - deltas not supported
ok 16 - property deltas supported
ok 17 - properties on /
ok 18 - deltas for typechange
ok 19 - set up svn repo
ok 20 - t9135/svn.dump
# still have 3 known breakage(s)
# failed 1 among remaining 17 test(s)
1..20
=====================

Some more info:
b@birne:~/projects/git/git.git> uname -a
Darwin birne.lan 10.7.0 Darwin Kernel Version 10.7.0: Sat Jan 29 
15:17:16 PST 2011; root:xnu-1504.9.37~1/RELEASE_I386 i386

tb@birne:~/projects/git/git.git> svn --version
svn, version 1.6.15 (r1038135)
    compiled Jan 29 2011, 15:18:15


tb@birne:~/projects/git/git.git> svnadmin --version
svnadmin, version 1.6.15 (r1038135)
    compiled Jan 29 2011, 15:18:15

  which svn
/usr/bin/svn

I can assist with some more testing
BR
/Torsten


On 03/25/2011 05:07 AM, Jonathan Nieder wrote:
 > A caller to buffer_read_string cannot easily tell the difference
 > between the string "foo" followed by an early end of file and the
 > string "foo\0bar\0baz".  In a half-hearted attempt to catch early EOF,
 > c9d1c8ba (2010-12-28) introduced a safety strlen(val) == len for
 > property keys and values, to at least keep svn-fe from reading
 > uninitialized data when a property list ends early due to EOF.
 >
 > But it is permissible for both keys and values to contain null
 > characters, so in handling revision 59151 of the ASF repository svn-fe
 > encounters a null byte and produces the following message:
 >
 >   fatal: invalid dump: unexpected end of file
 >
 > Fix it by using buffer_read_binary to read to a strbuf (and keep track
 > of the actual length read).  Most consumers of properties still use
 > C-style strings, so in practice we still can't use an author or log
 > message with embedded nuls, but at least this way svn-fe won't error
 > out.
 >
 > Reported-by: David Barr<david.barr@cordelta.com>
 > Signed-off-by: Jonathan Nieder<jrnieder@gmail.com>
 > ---
 >   t/t9010-svn-fe.sh |   27 +++++++++++++++++++++++++++
 >   vcs-svn/svndump.c |   24 ++++++++++--------------
 >   2 files changed, 37 insertions(+), 14 deletions(-)
 >
 > diff --git a/t/t9010-svn-fe.sh b/t/t9010-svn-fe.sh
 > index 5a6a4b9..47f1e4f 100755
 > --- a/t/t9010-svn-fe.sh
 > +++ b/t/t9010-svn-fe.sh
 > @@ -370,6 +370,33 @@ test_expect_failure 'change file mode but keep 
old content' '
 >   	test_cmp hello actual.target
 >   '
 >
 > +test_expect_success 'null byte in property value' '
 > +	reinit_git&&
 > +	echo "commit message">expect.message&&
 > +	{
 > +		properties \
 > +			unimportant "something with a null byte (Q)" \
 > +			svn:log "commit message"&&
 > +		echo PROPS-END
 > +	} |
 > +	q_to_nul>props&&
 > +	{
 > +		cat<<-\EOF&&
 > +		SVN-fs-dump-format-version: 3
 > +
 > +		Revision-number: 1
 > +		EOF
 > +		echo Prop-content-length: $(wc -c<props)&&
 > +		echo Content-length: $(wc -c<props)&&
 > +		echo&&
 > +		cat props
 > +	}>nullprop.dump&&
 > +	test-svn-fe nullprop.dump>stream&&
 > +	git fast-import<stream&&
 > +	git diff-tree --always -s --format=%s HEAD>actual.message&&
 > +	test_cmp expect.message actual.message
 > +'
 > +
 >   test_expect_success 'change file mode and reiterate content' '
 >   	reinit_git&&
 >   	cat>expect<<-\EOF&&
 > diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c
 > index ea5b128..c00f031 100644
 > --- a/vcs-svn/svndump.c
 > +++ b/vcs-svn/svndump.c
 > @@ -147,6 +147,7 @@ static void die_short_read(void)
 >   static void read_props(void)
 >   {
 >   	static struct strbuf key = STRBUF_INIT;
 > +	static struct strbuf val = STRBUF_INIT;
 >   	const char *t;
 >   	/*
 >   	 * NEEDSWORK: to support simple mode changes like
 > @@ -163,15 +164,15 @@ static void read_props(void)
 >   	uint32_t type_set = 0;
 >   	while ((t = buffer_read_line(&input))&&  strcmp(t, "PROPS-END")) {
 >   		uint32_t len;
 > -		const char *val;
 >   		const char type = t[0];
 >   		int ch;
 >
 >   		if (!type || t[1] != ' ')
 >   			die("invalid property line: %s\n", t);
 >   		len = atoi(&t[2]);
 > -		val = buffer_read_string(&input, len);
 > -		if (!val || strlen(val) != len)
 > +		strbuf_reset(&val);
 > +		buffer_read_binary(&input,&val, len);
 > +		if (val.len<  len)
 >   			die_short_read();
 >
 >   		/* Discard trailing newline. */
 > @@ -179,22 +180,17 @@ static void read_props(void)
 >   		if (ch == EOF)
 >   			die_short_read();
 >   		if (ch != '\n')
 > -			die("invalid dump: expected newline after %s", val);
 > +			die("invalid dump: expected newline after %s", val.buf);
 >
 >   		switch (type) {
 >   		case 'K':
 > +			strbuf_swap(&key,&val);
 > +			continue;
 >   		case 'D':
 > -			strbuf_reset(&key);
 > -			if (val)
 > -				strbuf_add(&key, val, len);
 > -			if (type == 'K')
 > -				continue;
 > -			assert(type == 'D');
 > -			val = NULL;
 > -			len = 0;
 > -			/* fall through */
 > +			handle_property(&val, NULL, 0,&type_set);
 > +			continue;
 >   		case 'V':
 > -			handle_property(&key, val, len,&type_set);
 > +			handle_property(&key, val.buf, len,&type_set);
 >   			strbuf_reset(&key);
 >   			continue;
 >   		default:


======================










On 03/25/2011 05:07 AM, Jonathan Nieder wrote:
> A caller to buffer_read_string cannot easily tell the difference
> between the string "foo" followed by an early end of file and the
> string "foo\0bar\0baz".  In a half-hearted attempt to catch early EOF,
> c9d1c8ba (2010-12-28) introduced a safety strlen(val) == len for
> property keys and values, to at least keep svn-fe from reading
> uninitialized data when a property list ends early due to EOF.
>
> But it is permissible for both keys and values to contain null
> characters, so in handling revision 59151 of the ASF repository svn-fe
> encounters a null byte and produces the following message:
>
>   fatal: invalid dump: unexpected end of file
>
> Fix it by using buffer_read_binary to read to a strbuf (and keep track
> of the actual length read).  Most consumers of properties still use
> C-style strings, so in practice we still can't use an author or log
> message with embedded nuls, but at least this way svn-fe won't error
> out.
>
> Reported-by: David Barr<david.barr@cordelta.com>
> Signed-off-by: Jonathan Nieder<jrnieder@gmail.com>
> ---
>   t/t9010-svn-fe.sh |   27 +++++++++++++++++++++++++++
>   vcs-svn/svndump.c |   24 ++++++++++--------------
>   2 files changed, 37 insertions(+), 14 deletions(-)
>
> diff --git a/t/t9010-svn-fe.sh b/t/t9010-svn-fe.sh
> index 5a6a4b9..47f1e4f 100755
> --- a/t/t9010-svn-fe.sh
> +++ b/t/t9010-svn-fe.sh
> @@ -370,6 +370,33 @@ test_expect_failure 'change file mode but keep old content' '
>   	test_cmp hello actual.target
>   '
>
> +test_expect_success 'null byte in property value' '
> +	reinit_git&&
> +	echo "commit message">expect.message&&
> +	{
> +		properties \
> +			unimportant "something with a null byte (Q)" \
> +			svn:log "commit message"&&
> +		echo PROPS-END
> +	} |
> +	q_to_nul>props&&
> +	{
> +		cat<<-\EOF&&
> +		SVN-fs-dump-format-version: 3
> +
> +		Revision-number: 1
> +		EOF
> +		echo Prop-content-length: $(wc -c<props)&&
> +		echo Content-length: $(wc -c<props)&&
> +		echo&&
> +		cat props
> +	}>nullprop.dump&&
> +	test-svn-fe nullprop.dump>stream&&
> +	git fast-import<stream&&
> +	git diff-tree --always -s --format=%s HEAD>actual.message&&
> +	test_cmp expect.message actual.message
> +'
> +
>   test_expect_success 'change file mode and reiterate content' '
>   	reinit_git&&
>   	cat>expect<<-\EOF&&
> diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c
> index ea5b128..c00f031 100644
> --- a/vcs-svn/svndump.c
> +++ b/vcs-svn/svndump.c
> @@ -147,6 +147,7 @@ static void die_short_read(void)
>   static void read_props(void)
>   {
>   	static struct strbuf key = STRBUF_INIT;
> +	static struct strbuf val = STRBUF_INIT;
>   	const char *t;
>   	/*
>   	 * NEEDSWORK: to support simple mode changes like
> @@ -163,15 +164,15 @@ static void read_props(void)
>   	uint32_t type_set = 0;
>   	while ((t = buffer_read_line(&input))&&  strcmp(t, "PROPS-END")) {
>   		uint32_t len;
> -		const char *val;
>   		const char type = t[0];
>   		int ch;
>
>   		if (!type || t[1] != ' ')
>   			die("invalid property line: %s\n", t);
>   		len = atoi(&t[2]);
> -		val = buffer_read_string(&input, len);
> -		if (!val || strlen(val) != len)
> +		strbuf_reset(&val);
> +		buffer_read_binary(&input,&val, len);
> +		if (val.len<  len)
>   			die_short_read();
>
>   		/* Discard trailing newline. */
> @@ -179,22 +180,17 @@ static void read_props(void)
>   		if (ch == EOF)
>   			die_short_read();
>   		if (ch != '\n')
> -			die("invalid dump: expected newline after %s", val);
> +			die("invalid dump: expected newline after %s", val.buf);
>
>   		switch (type) {
>   		case 'K':
> +			strbuf_swap(&key,&val);
> +			continue;
>   		case 'D':
> -			strbuf_reset(&key);
> -			if (val)
> -				strbuf_add(&key, val, len);
> -			if (type == 'K')
> -				continue;
> -			assert(type == 'D');
> -			val = NULL;
> -			len = 0;
> -			/* fall through */
> +			handle_property(&val, NULL, 0,&type_set);
> +			continue;
>   		case 'V':
> -			handle_property(&key, val, len,&type_set);
> +			handle_property(&key, val.buf, len,&type_set);
>   			strbuf_reset(&key);
>   			continue;
>   		default:

^ permalink raw reply	[flat|nested] 72+ messages in thread

* Re: [PATCH 1/4] vcs-svn: make reading of properties binary-safe
  2011-03-28 15:34             ` tb
@ 2011-03-28 19:41               ` Jonathan Nieder
  2011-03-28 20:30                 ` Torsten Bögershausen
  0 siblings, 1 reply; 72+ messages in thread
From: Jonathan Nieder @ 2011-03-28 19:41 UTC (permalink / raw)
  To: tb; +Cc: David Barr, Git Mailing List, Ramkumar Ramachandra, Junio C Hamano

tb wrote:

> failure on t9010 with commit 195b7ca6f229455da61f9f6b
[...]
> Darwin birne.lan 10.7.0 Darwin Kernel Version 10.7.0: Sat Jan 29
> 15:17:16 PST 2011; root:xnu-1504.9.37~1/RELEASE_I386 i386

Could you try 41e6b91f (vcs-svn: add missing cast to printf argument,
2011-03-27) from

	git://repo.or.cz/git/jrn.git svn-fe

?  I suspect this is fallout from a missing cast in the commit you
pointed to.  Thanks for noticing.

^ permalink raw reply	[flat|nested] 72+ messages in thread

* Re: [PATCH 1/4] vcs-svn: make reading of properties binary-safe
  2011-03-28 19:41               ` Jonathan Nieder
@ 2011-03-28 20:30                 ` Torsten Bögershausen
  2011-03-28 20:44                   ` Jonathan Nieder
  0 siblings, 1 reply; 72+ messages in thread
From: Torsten Bögershausen @ 2011-03-28 20:30 UTC (permalink / raw)
  To: Jonathan Nieder
  Cc: tb, David Barr, Git Mailing List, Ramkumar Ramachandra, Junio C Hamano

On 03/28/2011 09:41 PM, Jonathan Nieder wrote:
> tb wrote:
>
>> failure on t9010 with commit 195b7ca6f229455da61f9f6b
> [...]
>> Darwin birne.lan 10.7.0 Darwin Kernel Version 10.7.0: Sat Jan 29
>> 15:17:16 PST 2011; root:xnu-1504.9.37~1/RELEASE_I386 i386
>
> Could you try 41e6b91f (vcs-svn: add missing cast to printf argument,
> 2011-03-27) from
>
> 	git://repo.or.cz/git/jrn.git svn-fe
>
> ?  I suspect this is fallout from a missing cast in the commit you
> pointed to.  Thanks for noticing.
> --
> To unsubscribe from this list: send the line "unsubscribe git" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
No good news from my side:
Same (?) problem with 41e6b...

not ok - 13 NUL in log message, file content, and property name

/Torsten

Note:
(commit 41e6b  works OK on  my 32 bit 386 linux machine.
  There is no svn on that machine, and commit 195b7ca6f22 had 10 
failures in TC 9010. (If that is any useful information))

^ permalink raw reply	[flat|nested] 72+ messages in thread

* Re: [PATCH 1/4] vcs-svn: make reading of properties binary-safe
  2011-03-28 20:30                 ` Torsten Bögershausen
@ 2011-03-28 20:44                   ` Jonathan Nieder
  0 siblings, 0 replies; 72+ messages in thread
From: Jonathan Nieder @ 2011-03-28 20:44 UTC (permalink / raw)
  To: Torsten Bögershausen
  Cc: David Barr, Git Mailing List, Ramkumar Ramachandra, Junio C Hamano

Torsten Bögershausen wrote:

> No good news from my side:
> Same (?) problem with 41e6b...
>
> not ok - 13 NUL in log message, file content, and property name

Alas.  Could you send the output for that test from
"sh t9010-svn-fe.sh -v -i", or from

	GIT_TRACE=1 sh -x t9010-svn-fe.sh -v -i

if it looks more enlightening?

^ permalink raw reply	[flat|nested] 72+ messages in thread

end of thread, other threads:[~2011-03-28 20:44 UTC | newest]

Thread overview: 72+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-03-19  7:03 vcs-svn: purge obsolete data structures and code David Barr
2011-03-19  7:03 ` [PATCH 1/9] vcs-svn: pass paths through to fast-import David Barr
2011-03-19  7:50   ` Jonathan Nieder
2011-03-19  7:03 ` [PATCH 2/9] vcs-svn: avoid using ls command twice David Barr
2011-03-19  8:01   ` Jonathan Nieder
2011-03-19  7:03 ` [PATCH 3/9] vcs-svn: implement perfect hash for node-prop keys David Barr
2011-03-19  8:51   ` Jonathan Nieder
2011-03-21  1:26     ` [PATCH 1/3] " David Barr
2011-03-21  1:26       ` [PATCH 2/3] vcs-svn: implement perfect hash for top-level keys David Barr
2011-03-21  1:26       ` [PATCH 3/3] vcs-svn: use switch rather than cascading ifs David Barr
2011-03-21  1:38         ` [PATCHv2] " David Barr
2011-03-19  7:03 ` [PATCH 4/9] vcs-svn: implement perfect hash for top-level keys David Barr
2011-03-19  8:57   ` Jonathan Nieder
2011-03-19  7:03 ` [PATCH 5/9] vcs-svn: factor out usage of string_pool David Barr
2011-03-19  9:08   ` Jonathan Nieder
2011-03-19  7:03 ` [PATCH 6/9] vcs-svn: drop string_pool David Barr
2011-03-19  7:03 ` [PATCH 7/9] vcs-svn: drop trp.h David Barr
2011-03-19  7:03 ` [PATCH 8/9] vcs-svn: drop obj_pool.h David Barr
2011-03-19  7:03 ` [PATCH 9/9] vcs-svn: use strchr to find RFC822 delimiter David Barr
2011-03-19  9:10   ` Jonathan Nieder
2011-03-19  7:20 ` vcs-svn: integrate support for text deltas David Barr
2011-03-19  7:20   ` [PATCH 01/16] vcs-svn: improve support for reading large files David Barr
2011-03-19  7:20   ` [PATCH 02/16] vcs-svn: make buffer_skip_bytes return length read David Barr
2011-03-19  7:20   ` [PATCH 03/16] vcs-svn: make buffer_copy_bytes " David Barr
2011-03-19  7:20   ` [PATCH 04/16] vcs-svn: improve reporting of input errors David Barr
2011-03-19  7:20   ` [PATCH 05/16] vcs-svn: learn to maintain a sliding view of a file David Barr
2011-03-19  7:20   ` [PATCH 06/16] vcs-svn: skeleton of an svn delta parser David Barr
2011-03-28  3:30     ` Jonathan Nieder
2011-03-19  7:20   ` [PATCH 07/16] vcs-svn: parse svndiff0 window header David Barr
2011-03-19  7:20   ` [PATCH 08/16] vcs-svn: read the preimage when applying deltas David Barr
2011-03-19  7:20   ` [PATCH 09/16] vcs-svn: read inline data from deltas David Barr
2011-03-19  7:20   ` [PATCH 10/16] vcs-svn: read instructions " David Barr
2011-03-19  7:20   ` [PATCH 11/16] vcs-svn: implement copyfrom_data delta instruction David Barr
2011-03-19  7:20   ` [PATCH 12/16] vcs-svn: verify that deltas consume all inline data David Barr
2011-03-19  7:20   ` [PATCH 13/16] vcs-svn: let deltas use data from postimage David Barr
2011-03-19  7:20   ` [PATCH 14/16] vcs-svn: let deltas use data from preimage David Barr
2011-03-19  7:20   ` [PATCH 15/16] vcs-svn: microcleanup in svndiff0 window-reading code David Barr
2011-03-19  7:20   ` [PATCH 16/16] vcs-svn: implement text-delta handling David Barr
2011-03-28  7:00   ` vcs-svn: integrate support for text deltas Jonathan Nieder
2011-03-28 11:56     ` David Barr
2011-03-21 23:49 ` [PATCHv2 00/11] vcs-svn: purge obsolete data structures and code David Barr
2011-03-21 23:49   ` [PATCH 01/11] vcs-svn: use strbuf for revision log David Barr
2011-03-21 23:49   ` [PATCH 02/11] vcs-svn: pass paths through to fast-import David Barr
2011-03-21 23:49   ` [PATCH 03/11] vcs-svn: avoid using ls command twice David Barr
2011-03-21 23:49   ` [PATCH 04/11] vcs-svn: implement perfect hash for node-prop keys David Barr
2011-03-21 23:49   ` [PATCH 05/11] vcs-svn: implement perfect hash for top-level keys David Barr
2011-03-21 23:49   ` [PATCH 06/11] vcs-svn: use switch rather than cascading ifs David Barr
2011-03-21 23:49   ` [PATCH 07/11] vcs-svn: factor out usage of string_pool David Barr
2011-03-21 23:49   ` [PATCH 08/11] vcs-svn: drop string_pool David Barr
2011-03-21 23:49   ` =?^[?q?=5BPATCH=2009/11=5D=20vcs-svn=3A=20drop=20trp=2Eh?= David Barr
2011-03-21 23:49   ` [PATCH 10/11] vcs-svn: drop obj_pool.h David Barr
2011-03-21 23:50   ` [PATCH 11/11] vcs-svn: use strchr to find RFC822 delimiter David Barr
2011-03-23  0:32   ` [PULL svn-fe] vcs-svn: simplifications, error handling improvements Jonathan Nieder
2011-03-23  5:46     ` Junio C Hamano
2011-03-23  6:03       ` Junio C Hamano
2011-03-26  6:42         ` Jonathan Nieder
2011-03-26  9:49           ` t0081-line-buffer.sh hangs (Re: [PULL svn-fe] vcs-svn: simplifications, error handling improvements) Jonathan Nieder
2011-03-23  7:11       ` [PULL svn-fe] vcs-svn: simplifications, error handling improvements David Barr
2011-03-24 12:43       ` [PATCH] fixup! vcs-svn: improve reporting of input errors David Barr
2011-03-25  1:12         ` Jonathan Nieder
2011-03-25  3:34         ` [PATCH svn-fe 0/4] vcs-svn: null bytes in properties Jonathan Nieder
2011-03-25  4:07           ` [PATCH 1/4] vcs-svn: make reading of properties binary-safe Jonathan Nieder
2011-03-28 15:34             ` tb
2011-03-28 19:41               ` Jonathan Nieder
2011-03-28 20:30                 ` Torsten Bögershausen
2011-03-28 20:44                   ` Jonathan Nieder
2011-03-25  4:09           ` [PATCH 2/4] vcs-svn: remove buffer_read_string Jonathan Nieder
2011-03-25  4:10           ` [PATCH 3/4] vcs-svn: avoid unnecessary copying of log message and author Jonathan Nieder
2011-03-25  4:11           ` [PATCH 4/4] vcs-svn: handle log message with embedded null bytes Jonathan Nieder
2011-03-26  6:46       ` [PULL svn-fe] vcs-svn: simplifications, error handling improvements Jonathan Nieder
2011-03-26 18:36         ` Junio C Hamano
2011-03-28  0:38           ` [PATCH svn-fe] vcs-svn: add missing cast to printf argument Jonathan Nieder

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.