All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/7] Import David's SVN exporter
@ 2010-05-23 21:40 Ramkumar Ramachandra
  2010-05-23 21:40 ` [WIP PATCH 1/7] Add skeleton remote helper for SVN Ramkumar Ramachandra
                   ` (6 more replies)
  0 siblings, 7 replies; 29+ messages in thread
From: Ramkumar Ramachandra @ 2010-05-23 21:40 UTC (permalink / raw)
  To: Git Mailing List; +Cc: David Michael Barr

Except for the first patch, this patch series attempts to include
David's exporter [1] into git.git. I've made some efforts to go
through every small detail in the code to clean and document it for
clarity. If the code is more-or-less ready for inclusion (perhaps into
'next'?), the plan is to continue developing the exporter on a
separate repository and pull changes into git.git. I haven't written a
Makefile rule for building this yet.

The first patch includes the skeleton remote helper I sent last time
with some minor changes, and is still a WIP (I'm especially unhappy
with the Makefile rule): I'll post another patch shortly. I've only
included it in this series to illustrate how I'll use David's
exporter.

[1]: http://github.com/artagnon/svn-dump-fast-export

-- Ram

Ramkumar Ramachandra (7):
  Add skeleton remote helper for SVN
  Add cpp macro implementation of treaps
  Add buffer pool library
  Add a memory pool library
  Add API for string-specific memory pool
  Add SVN revision parser and exporter
  Add handler for SVN dump

 Makefile              |    9 ++-
 remote-svn.c          |  201 +++++++++++++++++++++++++++++
 vcs-svn/fast_export.c |   61 +++++++++
 vcs-svn/fast_export.h |   17 +++
 vcs-svn/line_buffer.c |  115 +++++++++++++++++
 vcs-svn/line_buffer.h |   14 ++
 vcs-svn/obj_pool.h    |   61 +++++++++
 vcs-svn/repo_tree.c   |  333 +++++++++++++++++++++++++++++++++++++++++++++++++
 vcs-svn/repo_tree.h   |   31 +++++
 vcs-svn/string_pool.c |   84 +++++++++++++
 vcs-svn/string_pool.h |   11 ++
 vcs-svn/svnclient.c   |   20 +++
 vcs-svn/svnclient.h   |    7 +
 vcs-svn/svndump.c     |  277 ++++++++++++++++++++++++++++++++++++++++
 vcs-svn/svndump.h     |    7 +
 vcs-svn/trp.h         |  221 ++++++++++++++++++++++++++++++++
 16 files changed, 1468 insertions(+), 1 deletions(-)
 create mode 100644 remote-svn.c
 create mode 100644 vcs-svn/fast_export.c
 create mode 100644 vcs-svn/fast_export.h
 create mode 100644 vcs-svn/line_buffer.c
 create mode 100644 vcs-svn/line_buffer.h
 create mode 100644 vcs-svn/obj_pool.h
 create mode 100644 vcs-svn/repo_tree.c
 create mode 100644 vcs-svn/repo_tree.h
 create mode 100644 vcs-svn/string_pool.c
 create mode 100644 vcs-svn/string_pool.h
 create mode 100644 vcs-svn/svnclient.c
 create mode 100644 vcs-svn/svnclient.h
 create mode 100644 vcs-svn/svndump.c
 create mode 100644 vcs-svn/svndump.h
 create mode 100644 vcs-svn/trp.h

^ permalink raw reply	[flat|nested] 29+ messages in thread

* [WIP PATCH 1/7] Add skeleton remote helper for SVN
  2010-05-23 21:40 [PATCH 0/7] Import David's SVN exporter Ramkumar Ramachandra
@ 2010-05-23 21:40 ` Ramkumar Ramachandra
  2010-05-23 21:40 ` [PATCH 2/7] Add cpp macro implementation of treaps Ramkumar Ramachandra
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 29+ messages in thread
From: Ramkumar Ramachandra @ 2010-05-23 21:40 UTC (permalink / raw)
  To: Git Mailing List; +Cc: David Michael Barr

Create remote-svn.c, which is essentially a stripped-down version of
remote-curl.c to build the SVN remote helper upon. Also add a Makefile
rule to build it.

Signed-off-by: Ramkumar Ramachandra <artagnon@gmail.com>
---
 Makefile            |    9 ++-
 remote-svn.c        |  201 +++++++++++++++++++++++++++++++++++++++++++++++++++
 vcs-svn/svnclient.c |   20 +++++
 vcs-svn/svnclient.h |    7 ++
 4 files changed, 236 insertions(+), 1 deletions(-)
 create mode 100644 remote-svn.c
 create mode 100644 vcs-svn/svnclient.c
 create mode 100644 vcs-svn/svnclient.h

diff --git a/Makefile b/Makefile
index 4f7224a..91077a8 100644
--- a/Makefile
+++ b/Makefile
@@ -1658,7 +1658,7 @@ git.o git.spec \
 
 TEST_OBJS := $(patsubst test-%$X,test-%.o,$(TEST_PROGRAMS))
 GIT_OBJS := $(LIB_OBJS) $(BUILTIN_OBJS) $(PROGRAM_OBJS) $(TEST_OBJS) \
-	git.o http.o http-walker.o remote-curl.o
+	git.o http.o http-walker.o remote-curl.o remote-svn.o
 XDIFF_OBJS = xdiff/xdiffi.o xdiff/xprepare.o xdiff/xutils.o xdiff/xemit.o \
 	xdiff/xmerge.o xdiff/xpatience.o
 OBJECTS := $(GIT_OBJS) $(XDIFF_OBJS)
@@ -1746,6 +1746,9 @@ endif
 %.s: %.c GIT-CFLAGS FORCE
 	$(QUIET_CC)$(CC) -S $(ALL_CFLAGS) $(EXTRA_CPPFLAGS) $<
 
+remote-svn.o: remote-svn.c GIT-CFLAGS FORCE
+	$(QUIET_CC)$(CC) -S $(ALL_CFLAGS) -I./vcs-svn $(EXTRA_CPPFLAGS) $<
+
 ifdef USE_COMPUTED_HEADER_DEPENDENCIES
 # Take advantage of gcc's on-the-fly dependency generation
 # See <http://gcc.gnu.org/gcc-3.0/features.html>.
@@ -1824,6 +1827,10 @@ $(REMOTE_CURL_PRIMARY): remote-curl.o http.o http-walker.o $(GITLIBS)
 	$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) \
 		$(LIBS) $(CURL_LIBCURL) $(EXPAT_LIBEXPAT)
 
+git-remote-svn$X: remote-svn.o $(GITLIBS)
+	$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@	\
+		$(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS)
+
 $(LIB_FILE): $(LIB_OBJS)
 	$(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIB_OBJS)
 
diff --git a/remote-svn.c b/remote-svn.c
new file mode 100644
index 0000000..69d09c7
--- /dev/null
+++ b/remote-svn.c
@@ -0,0 +1,201 @@
+#include "cache.h"
+#include "strbuf.h"
+#include "remote.h"
+#include "strbuf.h"
+#include "exec_cmd.h"
+
+#include "svnclient.h"
+
+static struct remote *remote;
+static const char *url;
+
+struct options {
+	int verbosity;
+	unsigned long depth;
+	unsigned progress : 1,
+		dry_run : 1;
+};
+static struct options options;
+
+static int set_option(const char *name, const char *value)
+{
+	if (!strcmp(name, "verbosity")) {
+		char *end;
+		int v = strtol(value, &end, 10);
+		if (value == end || *end)
+			return -1;
+		options.verbosity = v;
+		return 0;
+	} else if (!strcmp(name, "progress")) {
+		if (!strcmp(value, "true"))
+			options.progress = 1;
+		else if (!strcmp(value, "false"))
+			options.progress = 0;
+		else
+			return -1;
+		return 0;
+	} else if (!strcmp(name, "dry-run")) {
+		if (!strcmp(value, "true"))
+			options.dry_run = 1;
+		else if (!strcmp(value, "false"))
+			options.dry_run = 0;
+		else
+			return -1;
+		return 0;
+	} else {
+		return 1;
+	}
+}
+
+static int export_handler(int nr_spec, char **specs)
+{
+	int err = 0;
+
+	/* TODO: The real exporting */
+        /* TODO: Write an importer for SVN */
+	
+	return err;
+}
+
+static int import_handler(int nr_spec, char **specs)
+{
+	int err = 0;
+	
+	/* TODO: The real importing */
+	/* TODO: Hook up an SVN exporter's fast-export stream */
+
+	return err;
+}
+
+static void parse_import(struct strbuf *buf)
+{
+	char **specs = NULL;
+	int alloc_spec = 0, nr_spec = 0, i;
+
+	do {
+		if (!prefixcmp(buf->buf, "import ")) {
+			ALLOC_GROW(specs, nr_spec + 1, alloc_spec);
+			specs[nr_spec++] = xstrdup(buf->buf + 5);
+		}
+		else
+			die("remote helper does not support %s", buf->buf);
+
+		strbuf_reset(buf);
+		if (strbuf_getline(buf, stdin, '\n') == EOF)
+			return;
+		if (!*buf->buf)
+			break;
+	} while (1);
+
+	if (import_handler(nr_spec, specs))
+		exit(128); /* error already reported */
+	for (i = 0; i < nr_spec; i++)
+		free(specs[i]);
+	free(specs);
+
+	printf("\n");
+	fflush(stdout);
+}
+
+static void parse_export(struct strbuf *buf)
+{
+	char **specs = NULL;
+	int alloc_spec = 0, nr_spec = 0, i;
+
+	do {
+		if (!prefixcmp(buf->buf, "export ")) {
+			ALLOC_GROW(specs, nr_spec + 1, alloc_spec);
+			specs[nr_spec++] = xstrdup(buf->buf + 5);
+		} else
+			die("remote helper does not support %s", buf->buf);
+
+		strbuf_reset(buf);
+		if (strbuf_getline(buf, stdin, '\n') == EOF)
+			return;
+		if (!*buf->buf)
+			break;
+	} while (1);
+
+	if (export_handler(nr_spec, specs))
+		exit(128); /* error already reported */
+	for (i = 0; i < nr_spec; i++)
+		free(specs[i]);
+	free(specs);
+
+	printf("\n");
+	fflush(stdout);
+}
+
+int main(int argc, const char **argv)
+{
+	struct strbuf buf = STRBUF_INIT;
+	int nongit;
+
+	git_extract_argv0_path(argv[0]);
+	setup_git_directory_gently(&nongit);
+	if (argc < 2) {
+		fprintf(stderr, "Remote needed\n");
+		return 1;
+	}
+
+	options.verbosity = 1;
+	options.progress = !!isatty(2);
+
+	remote = remote_get(argv[1]);
+
+	if (argc > 2) {
+		url = argv[2];
+	} else {
+		url = remote->url[0];
+	}
+
+	open_svn_connection(remote);
+	do {
+		if (strbuf_getline(&buf, stdin, '\n') == EOF)
+			break;
+
+		else if (!strcmp(buf.buf, "list")) {
+			/* TODO: Code to list refs */
+			break;
+
+		} else if (!prefixcmp(buf.buf, "import ")) {
+			if (nongit)
+				die("Import attempted without a local repo");
+
+			parse_import(&buf);
+
+		} else if (!prefixcmp(buf.buf, "export ")) {
+			parse_export(&buf);
+
+		} else if (!prefixcmp(buf.buf, "option ")) {
+			char *name = buf.buf + strlen("option ");
+			char *value = strchr(name, ' ');
+			int result;
+			if (value)
+				*value++ = '\0';
+			else
+				value = "true";
+			result = set_option(name, value);
+			if (!result)
+				printf("ok\n");
+			else if (result < 0)
+				printf("error invalid value\n");
+			else
+				printf("unsupported\n");
+			fflush(stdout);
+
+		} else if (!strcmp(buf.buf, "capabilities")) {
+			printf("option\n");
+			printf("import\n");
+			printf("export\n");
+			printf("\n");
+			fflush(stdout);
+		} else
+			return 1;
+
+		strbuf_reset(&buf);
+	} while (1);
+
+	close_svn_connection(remote);
+	return 0;
+}
diff --git a/vcs-svn/svnclient.c b/vcs-svn/svnclient.c
new file mode 100644
index 0000000..2a3ca44
--- /dev/null
+++ b/vcs-svn/svnclient.c
@@ -0,0 +1,20 @@
+int open_svn_connection(struct remote *remote)
+{
+        /* TODO: Open connection to remote */
+}
+
+int close_svn_connection(struct remote *remote)
+{
+        /* TODO: Close connection */
+}
+
+int FI_svn (unsigned char *sha1, const char *ref)
+{
+        /* TODO: Hook up fast-export stream from SVN exporter */
+}
+
+int FE_svn (unsigned char *sha1, const char *ref)
+{
+        /* TODO: Write a fast-import module for SVN to import from the
+           Git fast-export stream */
+}
diff --git a/vcs-svn/svnclient.h b/vcs-svn/svnclient.h
new file mode 100644
index 0000000..ecd5e8b
--- /dev/null
+++ b/vcs-svn/svnclient.h
@@ -0,0 +1,7 @@
+#ifndef SVNCLIENT_H
+#define SVNCLIENT_H
+
+int open_svn_connection(struct remote *remote);
+int close_svn_connection(struct remote *remote);
+
+#endif
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 2/7] Add cpp macro implementation of treaps
  2010-05-23 21:40 [PATCH 0/7] Import David's SVN exporter Ramkumar Ramachandra
  2010-05-23 21:40 ` [WIP PATCH 1/7] Add skeleton remote helper for SVN Ramkumar Ramachandra
@ 2010-05-23 21:40 ` Ramkumar Ramachandra
  2010-05-29  7:18   ` Jonathan Nieder
  2010-05-23 21:40 ` [PATCH 3/7] Add buffer pool library Ramkumar Ramachandra
                   ` (4 subsequent siblings)
  6 siblings, 1 reply; 29+ messages in thread
From: Ramkumar Ramachandra @ 2010-05-23 21:40 UTC (permalink / raw)
  To: Git Mailing List; +Cc: David Michael Barr

The implementation exposes an API to generate type-specific treap
implmentation and various functions to operate on it. Taken directly
from David Michael Barr's svn-dump-fast-export repository.

Signed-off-by: Ramkumar Ramachandra <artagnon@gmail.com>
---
 vcs-svn/trp.h |  221 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 221 insertions(+), 0 deletions(-)
 create mode 100644 vcs-svn/trp.h

diff --git a/vcs-svn/trp.h b/vcs-svn/trp.h
new file mode 100644
index 0000000..d40f9d3
--- /dev/null
+++ b/vcs-svn/trp.h
@@ -0,0 +1,221 @@
+/******************************************************************************
+ *
+ * cpp macro implementation of treaps.
+ *
+ * Usage:
+ *
+ *   #include <trp.h>
+ *   trp(...)
+ *   trp_gen(...)
+ *   ...
+ *
+ ******************************************************************************/
+
+#ifndef TRP_H_
+#define	TRP_H_
+
+#include <stdint.h>
+
+/* Node structure. */
+#define	trp_node(a_type)						\
+struct {								\
+    uint32_t trpn_left;							\
+    uint32_t trpn_right;						\
+}
+
+/* Root structure. */
+#define	trp(a_type)							\
+struct {								\
+    uint32_t trp_root;							\
+}
+
+/* Pointer/Offset conversion */
+#define trpn_pointer(a_base, a_offset)					\
+    (a_base##_pointer(a_offset))
+#define trpn_offset(a_base, a_pointer)				        \
+    (a_base##_offset(a_pointer))
+
+/* Left accessors. */
+#define	trp_left_get(a_base, a_type, a_field, a_node)			\
+    trpn_pointer(a_base, (a_node)->a_field.trpn_left)
+#define	trp_left_set(a_base, a_type, a_field, a_node, a_left) do {	\
+    (a_node)->a_field.trpn_left = trpn_offset(a_base, a_left);	        \
+} while (0)
+
+/* Right accessors. */
+#define	trp_right_get(a_base, a_type, a_field, a_node)			\
+    trpn_pointer(a_base, (a_node)->a_field.trpn_right)
+#define	trp_right_set(a_base, a_type, a_field, a_node, a_right) do {	\
+    (a_node)->a_field.trpn_right = trpn_offset(a_base, a_right);        \
+} while (0)
+
+/* Priority accessors. */
+#define	trp_prio_get(a_type, a_field, a_node)				\
+    (2654435761*(uint32_t)(uintptr_t)(a_node))
+
+/* Node initializer. */
+#define	trp_node_new(a_base, a_type, a_field, a_trp, a_node) do {	\
+    trp_left_set(a_base, a_type, a_field, (a_node), NULL);		\
+    trp_right_set(a_base, a_type, a_field, (a_node), NULL);		\
+} while (0)
+
+/* Tree initializer. */
+#define	trp_new(a_type, a_base, a_trp) do {		        	\
+    (a_trp)->trp_root = trpn_offset(a_base, NULL);			\
+} while (0)
+
+/* Internal utility macros. */
+#define	trpn_rotate_left(a_base, a_type, a_field, a_node, r_node) do {	\
+    (r_node) = trp_right_get(a_base, a_type, a_field, (a_node));	\
+    trp_right_set(a_base, a_type, a_field, (a_node),			\
+      trp_left_get(a_base, a_type, a_field, (r_node)));			\
+    trp_left_set(a_base, a_type, a_field, (r_node), (a_node));		\
+} while (0)
+
+#define	trpn_rotate_right(a_base, a_type, a_field, a_node, r_node) do {	\
+    (r_node) = trp_left_get(a_base, a_type, a_field, (a_node));		\
+    trp_left_set(a_base, a_type, a_field, (a_node),			\
+      trp_right_get(a_base, a_type, a_field, (r_node)));		\
+    trp_right_set(a_base, a_type, a_field, (r_node), (a_node));		\
+} while (0)
+
+/*
+ * The trp_gen() macro generates a type-specific treap implementation,
+ * based on the above cpp macros.
+ *
+ * Arguments:
+ *
+ *   a_attr     : Function attribute for generated functions (ex: static).
+ *   a_pre      : Prefix for generated functions (ex: treap_).
+ *   a_t_type   : Type for treap data structure (ex: treap_t).
+ *   a_type     : Type for treap node data structure (ex: treap_node_t).
+ *   a_field    : Name of treap node linkage (ex: treap_link).
+ *   a_base     : Expression for the base pointer from which nodes are offset.
+ *   a_cmp      : Node comparison function name, with the following prototype:
+ *                  int (a_cmp *)(a_type *a_node, a_type *a_other);
+ *                                        ^^^^^^
+ *                                     or a_key
+ *                Interpretation of comparision function return values:
+ *                  -1 : a_node <  a_other
+ *                   0 : a_node == a_other
+ *                   1 : a_node >  a_other
+ *                In all cases, the a_node or a_key macro argument is the first
+ *                argument to the comparison function, which makes it possible
+ *                to write comparison functions that treat the first argument
+ *                specially.
+ *
+ * Assuming the following setup:
+ *
+ *   typedef struct ex_node_s ex_node_t;
+ *   struct ex_node_s {
+ *       trp_node(ex_node_t) ex_link;
+ *   };
+ *   typedef trp(ex_node_t) ex_t;
+ *   static ex_node_t ex_base[MAX_NODES];
+ *   trp_gen(static, ex_, ex_t, ex_node_t, ex_link, ex_base, ex_cmp)
+ *
+ * The following API is generated:
+ *
+ *   static void
+ *   ex_new(ex_t *treap);
+ *       Description: Initialize a treap structure.
+ *       Args:
+ *         treap: Pointer to an uninitialized treap object.
+ *
+ *   static ex_node_t *
+ *   ex_psearch(ex_t *treap, ex_node_t *key);
+ *       Description: Search for node that matches key.  If no match is found,
+ *                    return what would be key's successor/predecessor, were
+ *                    key in treap.
+ *       Args:
+ *         treap: Pointer to a initialized treap object.
+ *         key  : Search key.
+ *       Ret: Node in treap that matches key, or if no match, hypothetical
+ *            node's successor/predecessor (NULL if no successor/predecessor).
+ *
+ *   static void
+ *   ex_insert(ex_t *treap, ex_node_t *node);
+ *       Description: Insert node into treap.
+ *       Args:
+ *         treap: Pointer to a initialized treap object.
+ *         node : Node to be inserted into treap.
+ */
+#define	trp_gen(a_attr, a_pre, a_t_type, a_type, a_field, a_base, a_cmp)\
+a_attr void								\
+a_pre##new(a_t_type *treap) {           				\
+    trp_new(a_type, a_base, treap);			        	\
+}									\
+a_attr a_type *								\
+a_pre##search(a_t_type *treap, a_type *key) {				\
+    a_type *ret;							\
+    int cmp;								\
+    ret = trpn_pointer(a_base, treap->trp_root);			\
+    while (ret != NULL							\
+      && (cmp = (a_cmp)(key, ret)) != 0) {				\
+	if (cmp < 0) {							\
+	    ret = trp_left_get(a_base, a_type, a_field, ret);		\
+	} else {							\
+	    ret = trp_right_get(a_base, a_type, a_field, ret);		\
+	}								\
+    }									\
+    return (ret);							\
+}									\
+a_attr a_type *								\
+a_pre##psearch(a_t_type *treap, a_type *key) {				\
+    a_type *ret;							\
+    a_type *tnode = trpn_pointer(a_base, treap->trp_root);		\
+    ret = NULL;								\
+    while (tnode != NULL) {						\
+	int cmp = (a_cmp)(key, tnode);					\
+	if (cmp < 0) {							\
+	    tnode = trp_left_get(a_base, a_type, a_field, tnode);	\
+	} else if (cmp > 0) {						\
+	    ret = tnode;						\
+	    tnode = trp_right_get(a_base, a_type, a_field, tnode);	\
+	} else {							\
+	    ret = tnode;						\
+	    break;							\
+	}								\
+    }									\
+    return (ret);							\
+}									\
+a_attr a_type *								\
+a_pre##insert_recurse(a_type *cur_node, a_type *ins_node) {		\
+    if (cur_node == NULL) {						\
+	return (ins_node);						\
+    } else {								\
+	a_type *ret;							\
+	int cmp = a_cmp(ins_node, cur_node);				\
+	if (cmp < 0) {							\
+	    a_type *left = a_pre##insert_recurse(trp_left_get(a_base,	\
+              a_type, a_field, cur_node), ins_node);			\
+	    trp_left_set(a_base, a_type, a_field, cur_node, left);	\
+	    if (trp_prio_get(a_type, a_field, left) <			\
+	      trp_prio_get(a_type, a_field, cur_node)) {		\
+		trpn_rotate_right(a_base, a_type, a_field, cur_node,	\
+                  ret);							\
+	    } else {							\
+		ret = cur_node;						\
+	    }								\
+	} else {							\
+	    a_type *right = a_pre##insert_recurse(trp_right_get(a_base, \
+	      a_type, a_field, cur_node), ins_node);			\
+	    trp_right_set(a_base, a_type, a_field, cur_node, right);	\
+	    if (trp_prio_get(a_type, a_field, right) <			\
+	      trp_prio_get(a_type, a_field, cur_node)) {		\
+		trpn_rotate_left(a_base, a_type, a_field, cur_node,	\
+                  ret);							\
+	    } else {							\
+		ret = cur_node;						\
+	    }								\
+	}								\
+	return (ret);							\
+    }									\
+}									\
+a_attr void								\
+a_pre##insert(a_t_type *treap, a_type *node) {				\
+    trp_node_new(a_base, a_type, a_field, treap, node);			\
+    treap->trp_root = trpn_offset(a_base, a_pre##insert_recurse(        \
+        trpn_pointer(a_base, treap->trp_root), node));	                \
+}
+#endif                          /* TRP_H_ */
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 3/7] Add buffer pool library
  2010-05-23 21:40 [PATCH 0/7] Import David's SVN exporter Ramkumar Ramachandra
  2010-05-23 21:40 ` [WIP PATCH 1/7] Add skeleton remote helper for SVN Ramkumar Ramachandra
  2010-05-23 21:40 ` [PATCH 2/7] Add cpp macro implementation of treaps Ramkumar Ramachandra
@ 2010-05-23 21:40 ` Ramkumar Ramachandra
  2010-05-24  7:47   ` Peter Baumann
  2010-05-29  8:51   ` Jonathan Nieder
  2010-05-23 21:40 ` [PATCH 4/7] Add a memory " Ramkumar Ramachandra
                   ` (3 subsequent siblings)
  6 siblings, 2 replies; 29+ messages in thread
From: Ramkumar Ramachandra @ 2010-05-23 21:40 UTC (permalink / raw)
  To: Git Mailing List; +Cc: David Michael Barr

line_buffer creates a couple of static buffers and expose an API for
using them. The idea is to maintain a fixed static memory pool to
avoid constant allocation and de-allocation of memory. Taken directly
from David Michael Barr's svn-dump-fast-export repository.

Signed-off-by: Ramkumar Ramachandra <artagnon@gmail.com>
---
 vcs-svn/line_buffer.c |  115 +++++++++++++++++++++++++++++++++++++++++++++++++
 vcs-svn/line_buffer.h |   14 ++++++
 2 files changed, 129 insertions(+), 0 deletions(-)
 create mode 100644 vcs-svn/line_buffer.c
 create mode 100644 vcs-svn/line_buffer.h

diff --git a/vcs-svn/line_buffer.c b/vcs-svn/line_buffer.c
new file mode 100644
index 0000000..6b47d38
--- /dev/null
+++ b/vcs-svn/line_buffer.c
@@ -0,0 +1,115 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "line_buffer.h"
+
+#define LINE_BUFFER_LEN 10000
+#define COPY_BUFFER_LEN 4096
+
+static char line_buffer[LINE_BUFFER_LEN];
+static char byte_buffer[COPY_BUFFER_LEN];
+static uint32_t line_buffer_len = 0;
+static uint32_t line_len = 0;
+
+char *buffer_read_line(void)
+{
+    char *end;
+    uint32_t n_read;
+
+    if (line_len) {
+        memmove(line_buffer, &line_buffer[line_len],
+                line_buffer_len - line_len);
+        line_buffer_len -= line_len;
+        line_len = 0;
+    }
+
+    end = memchr(line_buffer, '\n', line_buffer_len);
+    while (line_buffer_len < LINE_BUFFER_LEN - 1 &&
+           !feof(stdin) && NULL == end) {
+        n_read =
+            fread(&line_buffer[line_buffer_len], 1,
+                  LINE_BUFFER_LEN - 1 - line_buffer_len,
+                  stdin);
+        end = memchr(&line_buffer[line_buffer_len], '\n', n_read);
+        line_buffer_len += n_read;
+    }
+
+    if (ferror(stdin))
+        return NULL;
+
+    if (end != NULL) {
+        line_len = end - line_buffer;
+        line_buffer[line_len++] = '\0';
+    } else {
+        line_len = line_buffer_len;
+        line_buffer[line_buffer_len] = '\0';
+    }
+
+    if (line_len == 0)
+        return NULL;
+
+    return line_buffer;
+}
+
+char *buffer_read_string(uint32_t len)
+{
+    char *s = malloc(len + 1);
+    uint32_t offset = 0;
+    if (line_buffer_len > line_len) {
+        offset = line_buffer_len - line_len;
+        if (offset > len)
+            offset = len;
+        memcpy(s, &line_buffer[line_len], offset);
+        line_len += offset;
+    }
+    while (offset < len && !feof(stdin)) {
+        offset += fread(&s[offset], 1, len - offset, stdin);
+    }
+    s[offset] = '\0';
+    return s;
+}
+
+void buffer_copy_bytes(uint32_t len)
+{
+    uint32_t in, out;
+    if (line_buffer_len > line_len) {
+        in = line_buffer_len - line_len;
+        if (in > len)
+            in = len;
+        out = 0;
+        while (out < in && !ferror(stdout)) {
+            out +=
+                fwrite(&line_buffer[line_len + out], 1, in - out, stdout);
+        }
+        len -= in;
+        line_len += in;
+    }
+    while (len > 0 && !feof(stdin)) {
+        in = len < COPY_BUFFER_LEN ? len : COPY_BUFFER_LEN;
+        in = fread(byte_buffer, 1, in, stdin);
+        len -= in;
+        out = 0;
+        while (out < in && !ferror(stdout)) {
+            out += fwrite(&byte_buffer[out], 1, in - out, stdout);
+        }
+    }
+}
+
+void buffer_skip_bytes(uint32_t len)
+{
+    uint32_t in;
+    if (line_buffer_len > line_len) {
+        in = line_buffer_len - line_len;
+        if (in > len)
+            in = len;
+        line_len += in;
+        len -= in;
+    }
+    while (len > 0 && !feof(stdin)) {
+        in = len < COPY_BUFFER_LEN ? len : COPY_BUFFER_LEN;
+        in = fread(byte_buffer, 1, in, stdin);
+        len -= in;
+    }
+}
+
diff --git a/vcs-svn/line_buffer.h b/vcs-svn/line_buffer.h
new file mode 100644
index 0000000..e546f4d
--- /dev/null
+++ b/vcs-svn/line_buffer.h
@@ -0,0 +1,14 @@
+#ifndef LINE_BUFFER_H_
+#define LINE_BUFFER_H_
+
+#include <stdint.h>
+
+char *buffer_read_line(void);
+
+char *buffer_read_string(uint32_t len);
+
+void buffer_copy_bytes(uint32_t len);
+
+void buffer_skip_bytes(uint32_t len);
+
+#endif
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 4/7] Add a memory pool library
  2010-05-23 21:40 [PATCH 0/7] Import David's SVN exporter Ramkumar Ramachandra
                   ` (2 preceding siblings ...)
  2010-05-23 21:40 ` [PATCH 3/7] Add buffer pool library Ramkumar Ramachandra
@ 2010-05-23 21:40 ` Ramkumar Ramachandra
  2010-05-29  9:06   ` Jonathan Nieder
  2010-05-23 21:40 ` [PATCH 5/7] Add API for string-specific memory pool Ramkumar Ramachandra
                   ` (2 subsequent siblings)
  6 siblings, 1 reply; 29+ messages in thread
From: Ramkumar Ramachandra @ 2010-05-23 21:40 UTC (permalink / raw)
  To: Git Mailing List; +Cc: David Michael Barr

Add a memory pool library implemented using cpp macros. The macro can
be used to create a type-specific memory pool API. The memory nodes
themselves are stored in a treap, using macros in trp.h. Taken
directly from David Michael Barr's svn-dump-fast-export repository.

Signed-off-by: Ramkumar Ramachandra <artagnon@gmail.com>
---
 vcs-svn/obj_pool.h |   61 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 61 insertions(+), 0 deletions(-)
 create mode 100644 vcs-svn/obj_pool.h

diff --git a/vcs-svn/obj_pool.h b/vcs-svn/obj_pool.h
new file mode 100644
index 0000000..d8b0842
--- /dev/null
+++ b/vcs-svn/obj_pool.h
@@ -0,0 +1,61 @@
+#ifndef OBJ_POOL_H_
+#define OBJ_POOL_H_
+
+#include <stdint.h>
+#include <stdlib.h>
+
+/*
+ * The obj_pool_gen() macro generates a type-specific memory pool
+ * implementation.
+ *
+ * Arguments:
+ *
+ *   pre              : Prefix for generated functions (ex: string_).
+ *   obj_t            : Type for treap data structure (ex: char).
+ *   intial_capacity  : The initial size of the memory pool (ex: 4096).
+ *
+ */
+#define obj_pool_gen(pre, obj_t, initial_capacity)                         \
+static struct {                                                            \
+    uint32_t size;                                                         \
+    uint32_t capacity;                                                     \
+    obj_t *base;                                                           \
+} pre##_pool = { 0, 0, NULL};                                              \
+static uint32_t pre##_alloc(uint32_t count)                                \
+{                                                                          \
+    uint32_t offset;                                                       \
+    while (pre##_pool.size + count > pre##_pool.capacity) {                \
+        if (pre##_pool.capacity) {                                         \
+            pre##_pool.capacity *= 2;                                      \
+        } else {                                                           \
+            pre##_pool.capacity = initial_capacity;                        \
+        }                                                                  \
+        pre##_pool.base =                                                  \
+            realloc(pre##_pool.base, pre##_pool.capacity * sizeof(obj_t)); \
+    }                                                                      \
+    offset = pre##_pool.size;                                              \
+    pre##_pool.size += count;                                              \
+    return offset;                                                         \
+}                                                                          \
+static void pre##_free(uint32_t count)                                     \
+{                                                                          \
+    pre##_pool.size -= count;                                              \
+}                                                                          \
+static uint32_t pre##_offset(obj_t *obj)                                   \
+{                                                                          \
+    return obj == NULL ? ~0 : obj - pre##_pool.base;                       \
+}                                                                          \
+static obj_t *pre##_pointer(uint32_t offset)                               \
+{                                                                          \
+    return offset >= pre##_pool.size ? NULL : &pre##_pool.base[offset];    \
+}                                                                          \
+static void pre##_reset(void)                                              \
+{                                                                          \
+    if (pre##_pool.base)                                                   \
+        free(pre##_pool.base);                                             \
+    pre##_pool.base = NULL;                                                \
+    pre##_pool.size = 0;                                                   \
+    pre##_pool.capacity = 0;                                               \
+}                                                                          \
+
+#endif
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 5/7] Add API for string-specific memory pool
  2010-05-23 21:40 [PATCH 0/7] Import David's SVN exporter Ramkumar Ramachandra
                   ` (3 preceding siblings ...)
  2010-05-23 21:40 ` [PATCH 4/7] Add a memory " Ramkumar Ramachandra
@ 2010-05-23 21:40 ` Ramkumar Ramachandra
  2010-05-29 11:38   ` Jonathan Nieder
  2010-05-23 21:40 ` [PATCH 6/7] Add SVN revision parser and exporter Ramkumar Ramachandra
  2010-05-23 21:40 ` [PATCH 7/7] Add handler for SVN dump Ramkumar Ramachandra
  6 siblings, 1 reply; 29+ messages in thread
From: Ramkumar Ramachandra @ 2010-05-23 21:40 UTC (permalink / raw)
  To: Git Mailing List; +Cc: David Michael Barr

string_pool uses the macros in the memory pool library to create a
memory pool for strings and expose an API for handling the
strings. Taken directly from David Michael Barr's svn-dump-fast-export
repository.

Signed-off-by: Ramkumar Ramachandra <artagnon@gmail.com>
---
 vcs-svn/string_pool.c |   84 +++++++++++++++++++++++++++++++++++++++++++++++++
 vcs-svn/string_pool.h |   11 ++++++
 2 files changed, 95 insertions(+), 0 deletions(-)
 create mode 100644 vcs-svn/string_pool.c
 create mode 100644 vcs-svn/string_pool.h

diff --git a/vcs-svn/string_pool.c b/vcs-svn/string_pool.c
new file mode 100644
index 0000000..4624134
--- /dev/null
+++ b/vcs-svn/string_pool.c
@@ -0,0 +1,84 @@
+#include <string.h>
+
+#include "trp.h"
+#include "obj_pool.h"
+#include "string_pool.h"
+
+typedef struct node_s node_t;
+typedef trp(node_t) tree_t;
+static tree_t tree = { ~0 };
+
+struct node_s {
+    uint32_t offset;
+    trp_node(node_t) children;
+};
+
+/* Create two memory pools: one for node_t, and another for strings */
+obj_pool_gen(node, node_t, 4096);
+obj_pool_gen(string, char, 4096);
+
+static char *node_value(node_t *node)
+{
+    return node ? string_pointer(node->offset) : NULL;
+}
+
+static int node_value_cmp(node_t *a, node_t *b)
+{
+    return strcmp(node_value(a), node_value(b));
+}
+
+static int node_indentity_cmp(node_t *a, node_t *b)
+{
+    int r = node_value_cmp(a, b);
+    return r ? r : (((uintptr_t) a) > ((uintptr_t) b))
+        - (((uintptr_t) a) < ((uintptr_t) b));
+}
+
+/* Build a Treap from the node_s structure (a trp_node w/ offset) */
+trp_gen(static, tree_, tree_t, node_t, children, node,
+        node_indentity_cmp);
+
+static char *pool_fetch(uint32_t entry)
+{
+    return node_value(node_pointer(entry));
+}
+
+uint32_t pool_intern(char *key)
+{
+    node_t *match = NULL;
+    uint32_t key_len = strlen(key) + 1;
+    node_t *node = node_pointer(node_alloc(1));
+    node->offset = string_alloc(key_len);
+    strcpy(node_value(node), key);
+    match = tree_psearch(&tree, node);
+    if (!match || node_value_cmp(node, match)) {
+        tree_insert(&tree, node);
+    } else {
+        node_free(1);
+        string_free(key_len);
+        node = match;
+    }
+    return node_offset(node);
+}
+
+uint32_t pool_tok_r(char *str, const char *delim, char **saveptr)
+{
+    char *token = strtok_r(str, delim, saveptr);
+    return token ? pool_intern(token) : ~0;
+}
+
+void pool_print_seq(uint32_t len, uint32_t *seq, char delim, FILE *stream)
+{
+    uint32_t i;
+    for (i = 0; i < len; i++) {
+        fputs(pool_fetch(seq[i]), stream);
+        if (i < len - 1)
+            fputc(delim, stream);
+    }
+}
+
+void pool_reset(void)
+{
+    node_reset();
+    string_reset();
+}
diff --git a/vcs-svn/string_pool.h b/vcs-svn/string_pool.h
new file mode 100644
index 0000000..fb9e6b8
--- /dev/null
+++ b/vcs-svn/string_pool.h
@@ -0,0 +1,11 @@
+#ifndef STRING_POOL_H_
+#define	STRING_POOL_H_
+
+#include <stdint.h>
+#include <stdio.h>
+
+uint32_t pool_tok_r(char *str, const char *delim, char **saveptr);
+void pool_print_seq(uint32_t len, uint32_t *seq, char delim, FILE *stream);
+void pool_reset(void);
+
+#endif
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 6/7] Add SVN revision parser and exporter
  2010-05-23 21:40 [PATCH 0/7] Import David's SVN exporter Ramkumar Ramachandra
                   ` (4 preceding siblings ...)
  2010-05-23 21:40 ` [PATCH 5/7] Add API for string-specific memory pool Ramkumar Ramachandra
@ 2010-05-23 21:40 ` Ramkumar Ramachandra
  2010-05-29 14:06   ` Jonathan Nieder
  2010-05-23 21:40 ` [PATCH 7/7] Add handler for SVN dump Ramkumar Ramachandra
  6 siblings, 1 reply; 29+ messages in thread
From: Ramkumar Ramachandra @ 2010-05-23 21:40 UTC (permalink / raw)
  To: Git Mailing List; +Cc: David Michael Barr

repo_tree parses SVN revisions to build a Git objects, and use
fast_export to emit them so they can be imported into the Git object
store via a fast-import. Taken directly from David Michael Barr's
svn-dump-fast-export repository.

Signed-off-by: Ramkumar Ramachandra <artagnon@gmail.com>
---
 vcs-svn/fast_export.c |   61 +++++++++
 vcs-svn/fast_export.h |   17 +++
 vcs-svn/repo_tree.c   |  333 +++++++++++++++++++++++++++++++++++++++++++++++++
 vcs-svn/repo_tree.h   |   31 +++++
 4 files changed, 442 insertions(+), 0 deletions(-)
 create mode 100644 vcs-svn/fast_export.c
 create mode 100644 vcs-svn/fast_export.h
 create mode 100644 vcs-svn/repo_tree.c
 create mode 100644 vcs-svn/repo_tree.h

diff --git a/vcs-svn/fast_export.c b/vcs-svn/fast_export.c
new file mode 100644
index 0000000..f4d9ab7
--- /dev/null
+++ b/vcs-svn/fast_export.c
@@ -0,0 +1,61 @@
+#include <string.h>
+
+#include "fast_export.h"
+#include "line_buffer.h"
+#include "repo_tree.h"
+#include "string_pool.h"
+
+#define MAX_GITSVN_LINE_LEN 4096
+
+void fast_export_delete(uint32_t depth, uint32_t *path)
+{
+    putchar('D');
+    putchar(' ');
+    pool_print_seq(depth, path, '/', stdout);
+    putchar('\n');
+}
+
+void fast_export_modify(uint32_t depth, uint32_t *path, uint32_t mode,
+                        uint32_t mark)
+{
+    printf("M %06o :%d ", mode, mark);
+    pool_print_seq(depth, path, '/', stdout);
+    putchar('\n');
+}
+
+static char gitsvnline[MAX_GITSVN_LINE_LEN];
+void fast_export_commit(uint32_t revision, char *author, char *log,
+                        char *uuid, char *url, time_t timestamp)
+{
+    if (!author)
+        author = "nobody";
+    if (!log)
+        log = "";
+    if (uuid && url) {
+        snprintf(gitsvnline, MAX_GITSVN_LINE_LEN, "\n\ngit-svn-id: %s@%d %s\n",
+             url, revision, uuid);
+    } else {
+        *gitsvnline = '\0';
+    }
+    printf("commit refs/heads/master\nmark :%d\n", revision);
+    printf("committer %s <%s@%s> %ld +0000\n",
+         author, author, uuid ? uuid : "local", timestamp);
+    printf("data %ld\n%s%s\n",
+           strlen(log) + strlen(gitsvnline), log, gitsvnline);
+    repo_diff(revision - 1, revision);
+    fputc('\n', stdout);
+
+    printf("progress Imported commit %d.\n\n", revision);
+}
+
+void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len)
+{
+    if (mode == REPO_MODE_LNK) {
+        /* svn symlink blobs start with "link " */
+        buffer_skip_bytes(5);
+        len -= 5;
+    }
+    printf("blob\nmark :%d\ndata %d\n", mark, len);
+    buffer_copy_bytes(len);
+    fputc('\n', stdout);
+}
diff --git a/vcs-svn/fast_export.h b/vcs-svn/fast_export.h
new file mode 100644
index 0000000..e84144e
--- /dev/null
+++ b/vcs-svn/fast_export.h
@@ -0,0 +1,17 @@
+#ifndef FAST_EXPORT_H_
+#define FAST_EXPORT_H_
+
+#include <stdint.h>
+#include <time.h>
+
+void fast_export_delete(uint32_t depth, uint32_t *path);
+
+void fast_export_modify(uint32_t depth, uint32_t *path, uint32_t mode,
+                        uint32_t mark);
+
+void fast_export_commit(uint32_t revision, char *author, char *log,
+                        char *uuid, char *url, time_t timestamp);
+
+void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len);
+
+#endif
diff --git a/vcs-svn/repo_tree.c b/vcs-svn/repo_tree.c
new file mode 100644
index 0000000..7c4a70f
--- /dev/null
+++ b/vcs-svn/repo_tree.c
@@ -0,0 +1,333 @@
+#include <string.h>
+
+#include "string_pool.h"
+#include "repo_tree.h"
+#include "obj_pool.h"
+#include "fast_export.h"
+
+typedef struct repo_dirent_s repo_dirent_t;
+
+struct repo_dirent_s {
+    uint32_t name_offset;
+    uint32_t mode;
+    uint32_t content_offset;
+};
+
+typedef struct repo_dir_s repo_dir_t;
+
+struct repo_dir_s {
+    uint32_t size;
+    uint32_t first_offset;
+};
+
+typedef struct repo_commit_s repo_commit_t;
+
+struct repo_commit_s {
+    uint32_t mark;
+    uint32_t root_dir_offset;
+};
+
+/* Generate memory pools for commit, dir and dirent */
+obj_pool_gen(commit, repo_commit_t, 4096);
+obj_pool_gen(dir, repo_dir_t, 4096);
+obj_pool_gen(dirent, repo_dirent_t, 4096);
+
+static uint32_t num_dirs_saved = 0;
+static uint32_t num_dirents_saved = 0;
+static uint32_t active_commit = -1;
+
+static repo_dir_t *repo_commit_root_dir(repo_commit_t *commit)
+{
+    return dir_pointer(commit->root_dir_offset);
+}
+
+static repo_dirent_t *repo_first_dirent(repo_dir_t *dir)
+{
+    return dirent_pointer(dir->first_offset);
+}
+
+static int repo_dirent_name_cmp(const void *a, const void *b)
+{
+    return (((repo_dirent_t *) a)->name_offset
+            > ((repo_dirent_t *) b)->name_offset) -
+        (((repo_dirent_t *) a)->name_offset
+         < ((repo_dirent_t *) b)->name_offset);
+}
+
+static repo_dirent_t *repo_dirent_by_name(repo_dir_t *dir,
+                                          uint32_t name_offset)
+{
+    repo_dirent_t key;
+    if (dir == NULL || dir->size == 0)
+        return NULL;
+    key.name_offset = name_offset;
+    return bsearch(&key, repo_first_dirent(dir), dir->size,
+                   sizeof(repo_dirent_t), repo_dirent_name_cmp);
+}
+
+static int repo_dirent_is_dir(repo_dirent_t *dirent)
+{
+    return dirent != NULL && dirent->mode == REPO_MODE_DIR;
+}
+
+static repo_dir_t *repo_dir_from_dirent(repo_dirent_t *dirent)
+{
+    if (!repo_dirent_is_dir(dirent))
+        return NULL;
+    return dir_pointer(dirent->content_offset);
+}
+
+static uint32_t dir_with_dirents_alloc(uint32_t size)
+{
+    uint32_t offset = dir_alloc(1);
+    dir_pointer(offset)->size = size;
+    dir_pointer(offset)->first_offset = dirent_alloc(size);
+    return offset;
+}
+
+static repo_dir_t *repo_clone_dir(repo_dir_t *orig_dir, uint32_t padding)
+{
+    uint32_t orig_o, new_o, dirent_o;
+    orig_o = dir_offset(orig_dir);
+    if (orig_o < num_dirs_saved) {
+        new_o = dir_with_dirents_alloc(orig_dir->size + padding);
+        orig_dir = dir_pointer(orig_o);
+        dirent_o = dir_pointer(new_o)->first_offset;
+    } else {
+        if (padding == 0)
+            return orig_dir;
+        new_o = orig_o;
+        dirent_o = dirent_alloc(orig_dir->size + padding);
+    }
+    memcpy(dirent_pointer(dirent_o), repo_first_dirent(orig_dir),
+           orig_dir->size * sizeof(repo_dirent_t));
+    dir_pointer(new_o)->size = orig_dir->size + padding;
+    dir_pointer(new_o)->first_offset = dirent_o;
+    return dir_pointer(new_o);
+}
+
+static char repo_path_buffer[REPO_MAX_PATH_LEN];
+static repo_dirent_t *repo_read_dirent(uint32_t revision, char *path)
+{
+    char *ctx = NULL;
+    uint32_t name = 0;
+    repo_dir_t *dir = NULL;
+    repo_dirent_t *dirent = NULL;
+    dir = repo_commit_root_dir(commit_pointer(revision));
+    strncpy(repo_path_buffer, path, REPO_MAX_PATH_LEN);
+    repo_path_buffer[REPO_MAX_PATH_LEN - 1] = '\0';
+    path = repo_path_buffer;
+    for (name = pool_tok_r(path, "/", &ctx);
+         ~name; name = pool_tok_r(NULL, "/", &ctx)) {
+        dirent = repo_dirent_by_name(dir, name);
+        if (dirent == NULL) {
+            return NULL;
+        } else if (repo_dirent_is_dir(dirent)) {
+            dir = repo_dir_from_dirent(dirent);
+        } else {
+            break;
+        }
+    }
+    return dirent;
+}
+
+static void
+repo_write_dirent(char *path, uint32_t mode, uint32_t content_offset,
+                  uint32_t del)
+{
+    char *ctx;
+    uint32_t name, revision, dirent_o, dir_o, parent_dir_o;
+    repo_dir_t *dir;
+    repo_dirent_t *dirent = NULL;
+    revision = active_commit;
+    dir = repo_commit_root_dir(commit_pointer(revision));
+    dir = repo_clone_dir(dir, 0);
+    commit_pointer(revision)->root_dir_offset = dir_offset(dir);
+    strncpy(repo_path_buffer, path, REPO_MAX_PATH_LEN);
+    repo_path_buffer[REPO_MAX_PATH_LEN - 1] = '\0';
+    path = repo_path_buffer;
+    for (name = pool_tok_r(path, "/", &ctx); ~name;
+         name = pool_tok_r(NULL, "/", &ctx)) {
+        parent_dir_o = dir_offset(dir);
+        dirent = repo_dirent_by_name(dir, name);
+        if (dirent == NULL) {
+            dir = repo_clone_dir(dir, 1);
+            dirent = &repo_first_dirent(dir)[dir->size - 1];
+            dirent->name_offset = name;
+            dirent->mode = REPO_MODE_DIR;
+            qsort(repo_first_dirent(dir), dir->size,
+                  sizeof(repo_dirent_t), repo_dirent_name_cmp);
+            dirent = repo_dirent_by_name(dir, name);
+            dir_o = dir_with_dirents_alloc(0);
+            dirent->content_offset = dir_o;
+            dir = dir_pointer(dir_o);
+        } else if ((dir = repo_dir_from_dirent(dirent))) {
+            dirent_o = dirent_offset(dirent);
+            dir = repo_clone_dir(dir, 0);
+            if (dirent_o != ~0)
+                dirent_pointer(dirent_o)->content_offset = dir_offset(dir);
+        } else {
+            dirent->mode = REPO_MODE_DIR;
+            dirent_o = dirent_offset(dirent);
+            dir_o = dir_with_dirents_alloc(0);
+            dirent = dirent_pointer(dirent_o);
+            dir = dir_pointer(dir_o);
+            dirent->content_offset = dir_o;
+        }
+    }
+    if (dirent) {
+        dirent->mode = mode;
+        dirent->content_offset = content_offset;
+        if (del && ~parent_dir_o) {
+            dirent->name_offset = ~0;
+            dir = dir_pointer(parent_dir_o);
+            qsort(repo_first_dirent(dir), dir->size,
+                  sizeof(repo_dirent_t), repo_dirent_name_cmp);
+            dir->size--;
+        }
+    }
+}
+
+uint32_t repo_copy(uint32_t revision, char *src, char *dst)
+{
+    uint32_t mode = 0, content_offset = 0;
+    repo_dirent_t *src_dirent;
+    src_dirent = repo_read_dirent(revision, src);
+    if (src_dirent != NULL) {
+        mode = src_dirent->mode;
+        content_offset = src_dirent->content_offset;
+        repo_write_dirent(dst, mode, content_offset, 0);
+    }
+    return mode;
+}
+
+void repo_add(char *path, uint32_t mode, uint32_t blob_mark)
+{
+    repo_write_dirent(path, mode, blob_mark, 0);
+}
+
+uint32_t repo_replace(char *path, uint32_t blob_mark)
+{
+    uint32_t mode = 0;
+    repo_dirent_t *src_dirent;
+    src_dirent = repo_read_dirent(active_commit, path);
+    if (src_dirent != NULL) {
+        mode = src_dirent->mode;
+        repo_write_dirent(path, mode, blob_mark, 0);
+    }
+    return mode;
+}
+
+void repo_modify(char *path, uint32_t mode, uint32_t blob_mark)
+{
+    repo_write_dirent(path, mode, blob_mark, 0);
+}
+
+void repo_delete(char *path)
+{
+    repo_write_dirent(path, 0, 0, 1);
+}
+
+static void
+repo_git_add_r(uint32_t depth, uint32_t *path, repo_dir_t *dir);
+
+static void
+repo_git_add(uint32_t depth, uint32_t *path, repo_dirent_t *dirent)
+{
+    if (repo_dirent_is_dir(dirent)) {
+        repo_git_add_r(depth, path, repo_dir_from_dirent(dirent));
+    } else {
+        fast_export_modify(depth, path, dirent->mode, dirent->content_offset);
+    }
+}
+
+static void
+repo_git_add_r(uint32_t depth, uint32_t *path, repo_dir_t *dir)
+{
+    uint32_t o;
+    repo_dirent_t *de;
+    de = repo_first_dirent(dir);
+    for (o = 0; o < dir->size; o++) {
+        path[depth] = de[o].name_offset;
+        repo_git_add(depth + 1, path, &de[o]);
+    }
+}
+
+static void
+repo_diff_r(uint32_t depth, uint32_t *path, repo_dir_t *dir1,
+            repo_dir_t *dir2)
+{
+    repo_dirent_t *de1, *de2, *max_de1, *max_de2;
+    de1 = repo_first_dirent(dir1);
+    de2 = repo_first_dirent(dir2);
+    max_de1 = &de1[dir1->size];
+    max_de2 = &de2[dir2->size];
+
+    while (de1 < max_de1 && de2 < max_de2) {
+        if (de1->name_offset < de2->name_offset) {
+            path[depth] = (de1++)->name_offset;
+            fast_export_delete(depth + 1, path);
+        } else if (de1->name_offset == de2->name_offset) {
+            path[depth] = de1->name_offset;
+            if (de1->content_offset != de2->content_offset) {
+                if (repo_dirent_is_dir(de1) && repo_dirent_is_dir(de2)) {
+                    repo_diff_r(depth + 1, path,
+                                repo_dir_from_dirent(de1),
+                                repo_dir_from_dirent(de2));
+                } else {
+                    if (repo_dirent_is_dir(de1) != repo_dirent_is_dir(de2)) {
+                        fast_export_delete(depth + 1, path);
+                    }
+                    repo_git_add(depth + 1, path, de2);
+                }
+            }
+            de1++;
+            de2++;
+        } else {
+            path[depth] = de2->name_offset;
+            repo_git_add(depth + 1, path, de2++);
+        }
+    }
+    while (de1 < max_de1) {
+        path[depth] = (de1++)->name_offset;
+        fast_export_delete(depth + 1, path);
+    }
+    while (de2 < max_de2) {
+        path[depth] = de2->name_offset;
+        repo_git_add(depth + 1, path, de2++);
+    }
+}
+
+static uint32_t path_stack[1000];
+void repo_diff(uint32_t r1, uint32_t r2)
+{
+    repo_diff_r(0,
+                path_stack,
+                repo_commit_root_dir(commit_pointer(r1)),
+                repo_commit_root_dir(commit_pointer(r2)));
+}
+
+void repo_commit(uint32_t revision, char *author, char *log, char *uuid,
+                 char *url, time_t timestamp)
+{
+    if (revision == 0) {
+        active_commit = commit_alloc(1);
+        commit_pointer(active_commit)->root_dir_offset =
+            dir_with_dirents_alloc(0);
+    } else {
+        fast_export_commit(revision, author, log, uuid, url, timestamp);
+    }
+    num_dirs_saved = dir_pool.size;
+    num_dirents_saved = dirent_pool.size;
+    active_commit = commit_alloc(1);
+    commit_pointer(active_commit)->root_dir_offset =
+        commit_pointer(active_commit - 1)->root_dir_offset;
+}
+
+void repo_reset(void)
+{
+    pool_reset();
+    commit_reset();
+    dir_reset();
+    dirent_reset();
+}
diff --git a/vcs-svn/repo_tree.h b/vcs-svn/repo_tree.h
new file mode 100644
index 0000000..2d645dc
--- /dev/null
+++ b/vcs-svn/repo_tree.h
@@ -0,0 +1,31 @@
+#ifndef REPO_TREE_H_
+#define REPO_TREE_H_
+
+#include <stdint.h>
+#include <time.h>
+
+#define REPO_MODE_DIR 0040000
+#define REPO_MODE_BLB 0100644
+#define REPO_MODE_EXE 0100755
+#define REPO_MODE_LNK 0120000
+
+#define REPO_MAX_PATH_LEN 4096
+
+uint32_t repo_copy(uint32_t revision, char *src, char *dst);
+
+void repo_add(char *path, uint32_t mode, uint32_t blob_mark);
+
+uint32_t repo_replace(char *path, uint32_t blob_mark);
+
+void repo_modify(char *path, uint32_t mode, uint32_t blob_mark);
+
+void repo_delete(char *path);
+
+void repo_commit(uint32_t revision, char *author, char *log, char *uuid,
+                 char *url, time_t timestamp);
+
+void repo_diff(uint32_t r1, uint32_t r2);
+
+void repo_reset(void);
+
+#endif
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 7/7] Add handler for SVN dump
  2010-05-23 21:40 [PATCH 0/7] Import David's SVN exporter Ramkumar Ramachandra
                   ` (5 preceding siblings ...)
  2010-05-23 21:40 ` [PATCH 6/7] Add SVN revision parser and exporter Ramkumar Ramachandra
@ 2010-05-23 21:40 ` Ramkumar Ramachandra
  2010-05-30  8:59   ` Jonathan Nieder
  6 siblings, 1 reply; 29+ messages in thread
From: Ramkumar Ramachandra @ 2010-05-23 21:40 UTC (permalink / raw)
  To: Git Mailing List; +Cc: David Michael Barr

Expose an API to parse an SVN dump created with `svnadmin dump` and
emit a fast-import stream using fast_export and repo_tree. This is a
temporary workaround: it will be replaced with a client that
communicates live with a remote SVN server in future. Taken directly
from David Michael Barr's svn-dump-fast-export repository.

Signed-off-by: Ramkumar Ramachandra <artagnon@gmail.com>
---
 vcs-svn/svndump.c |  277 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 vcs-svn/svndump.h |    7 ++
 2 files changed, 284 insertions(+), 0 deletions(-)
 create mode 100644 vcs-svn/svndump.c
 create mode 100644 vcs-svn/svndump.h

diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c
new file mode 100644
index 0000000..dbe8d77
--- /dev/null
+++ b/vcs-svn/svndump.c
@@ -0,0 +1,277 @@
+/*
+ * Parse and rearrange a svnadmin dump.
+ * Create the dump with:
+ * svnadmin dump --incremental -r<startrev>:<endrev> <repository> >outfile
+ */
+
+#define _GNU_SOURCE
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+
+#include "repo_tree.h"
+#include "fast_export.h"
+#include "line_buffer.h"
+
+#define NODEACT_REPLACE 4
+#define NODEACT_DELETE 3
+#define NODEACT_ADD 2
+#define NODEACT_CHANGE 1
+#define NODEACT_UNKNOWN 0
+
+#define DUMP_CTX 0
+#define REV_CTX  1
+#define NODE_CTX 2
+
+#define LENGTH_UNKNOWN (~0)
+
+#define BLOB_MARK_OFFSET 1000000000
+
+static struct {
+    uint32_t action, propLength, textLength, srcRev, srcMode, mark, type;
+    char *src, *dst;
+} node_ctx;
+
+static struct {
+    uint32_t revision;
+    time_t timestamp;
+    char *descr, *author, *date;
+} rev_ctx;
+
+static struct {
+    char *uuid, *url;
+} dump_ctx;
+
+static void reset_node_ctx(char *fname)
+{
+    node_ctx.type = 0;
+    node_ctx.action = NODEACT_UNKNOWN;
+    node_ctx.propLength = LENGTH_UNKNOWN;
+    node_ctx.textLength = LENGTH_UNKNOWN;
+    if (node_ctx.src)
+        free(node_ctx.src);
+    node_ctx.src = NULL;
+    node_ctx.srcRev = 0;
+    node_ctx.srcMode = 0;
+    if (node_ctx.dst)
+        free(node_ctx.dst);
+    node_ctx.dst = fname;
+    node_ctx.mark = 0;
+}
+
+static void reset_rev_ctx(uint32_t revision)
+{
+    rev_ctx.revision = revision;
+    rev_ctx.timestamp = 0;
+    if (rev_ctx.descr)
+        free(rev_ctx.descr);
+    rev_ctx.descr = NULL;
+    if (rev_ctx.author)
+        free(rev_ctx.author);
+    rev_ctx.author = NULL;
+    if (rev_ctx.date)
+        free(rev_ctx.date);
+    rev_ctx.date = NULL;
+}
+
+static void reset_dump_ctx(char *url)
+{
+    if (dump_ctx.url)
+        free(dump_ctx.url);
+    dump_ctx.url = url;
+    if (dump_ctx.uuid)
+        free(dump_ctx.uuid);
+    dump_ctx.uuid = NULL;
+}
+
+static uint32_t next_blob_mark(void)
+{
+    static uint32_t mark = BLOB_MARK_OFFSET;
+    return mark++;
+}
+
+static void read_props(void)
+{
+    struct tm tm;
+    uint32_t len;
+    char *key = NULL;
+    char *val = NULL;
+    char *t;
+    while ((t = buffer_read_line()) && strcmp(t, "PROPS-END")) {
+        if (!strncmp(t, "K ", 2)) {
+            len = atoi(&t[2]);
+            key = buffer_read_string(len);
+            buffer_read_line();
+        } else if (!strncmp(t, "V ", 2)) {
+            len = atoi(&t[2]);
+            val = buffer_read_string(len);
+            if (!strcmp(key, "svn:log")) {
+                if (rev_ctx.descr)
+                    free(rev_ctx.descr);
+                rev_ctx.descr = val;
+            } else if (!strcmp(key, "svn:author")) {
+                if (rev_ctx.author)
+                    free(rev_ctx.author);
+                rev_ctx.author = val;
+            } else if (!strcmp(key, "svn:date")) {
+                if (rev_ctx.date)
+                    free(rev_ctx.date);
+                rev_ctx.date = val;
+                strptime(rev_ctx.date, "%FT%T", &tm);
+                timezone = 0;
+                tm.tm_isdst = 0;
+                rev_ctx.timestamp = mktime(&tm);
+            } else if (!strcmp(key, "svn:executable")) {
+                if (node_ctx.type == REPO_MODE_BLB) {
+                    node_ctx.type = REPO_MODE_EXE;
+                }
+                free(val);
+            } else if (!strcmp(key, "svn:special")) {
+                if (node_ctx.type == REPO_MODE_BLB) {
+                    node_ctx.type = REPO_MODE_LNK;
+                }
+                free(val);
+            } else {
+                free(val);
+            }
+            if (key)
+                free(key);
+            key = NULL;
+            buffer_read_line();
+        }
+    }
+}
+
+static void handle_node(void)
+{
+    if (node_ctx.propLength != LENGTH_UNKNOWN && node_ctx.propLength) {
+        read_props();
+    }
+
+    if (node_ctx.src && node_ctx.srcRev) {
+        node_ctx.srcMode =
+            repo_copy(node_ctx.srcRev, node_ctx.src, node_ctx.dst);
+    }
+
+    if (node_ctx.textLength != LENGTH_UNKNOWN &&
+        node_ctx.type != REPO_MODE_DIR) {
+        node_ctx.mark = next_blob_mark();
+    }
+
+    if (node_ctx.action == NODEACT_DELETE) {
+        repo_delete(node_ctx.dst);
+    } else if (node_ctx.action == NODEACT_CHANGE || 
+               node_ctx.action == NODEACT_REPLACE) {
+        if (node_ctx.propLength != LENGTH_UNKNOWN &&
+            node_ctx.textLength != LENGTH_UNKNOWN) {
+            repo_modify(node_ctx.dst, node_ctx.type, node_ctx.mark);
+        } else if (node_ctx.textLength != LENGTH_UNKNOWN) {
+            node_ctx.srcMode = repo_replace(node_ctx.dst, node_ctx.mark);
+        }
+    } else if (node_ctx.action == NODEACT_ADD) {
+        if (node_ctx.src && node_ctx.srcRev &&
+            node_ctx.propLength == LENGTH_UNKNOWN &&
+            node_ctx.textLength != LENGTH_UNKNOWN) {
+            node_ctx.srcMode = repo_replace(node_ctx.dst, node_ctx.mark);
+        } else if (node_ctx.type == REPO_MODE_DIR ||
+                   node_ctx.textLength != LENGTH_UNKNOWN){
+            repo_add(node_ctx.dst, node_ctx.type, node_ctx.mark);
+        }
+    }
+
+    if (node_ctx.propLength == LENGTH_UNKNOWN && node_ctx.srcMode) {
+        node_ctx.type = node_ctx.srcMode;
+    }
+
+    if (node_ctx.mark) {
+        fast_export_blob(node_ctx.type, node_ctx.mark, node_ctx.textLength);
+    } else if (node_ctx.textLength != LENGTH_UNKNOWN) {
+        buffer_skip_bytes(node_ctx.textLength);
+    }
+}
+
+static void handle_revision(void)
+{
+    repo_commit(rev_ctx.revision, rev_ctx.author, rev_ctx.descr, dump_ctx.uuid,
+                dump_ctx.url, rev_ctx.timestamp);
+}
+
+static void svndump_read(char *url)
+{
+    char *val;
+    char *t;
+    uint32_t active_ctx = DUMP_CTX; 
+    uint32_t len;
+
+    reset_dump_ctx(url);
+    while ((t = buffer_read_line())) {
+        val = strstr(t, ": ");
+        if (!val) continue;
+        *val++ = '\0';
+        *val++ = '\0';
+
+        if(!strcmp(t, "UUID")) {
+            dump_ctx.uuid = strdup(val);
+        } else if (!strcmp(t, "Revision-number")) {
+            if (active_ctx != DUMP_CTX) handle_revision();
+            active_ctx = REV_CTX;
+            reset_rev_ctx(atoi(val));
+        } else if (!strcmp(t, "Node-path")) {
+            active_ctx = NODE_CTX;
+            reset_node_ctx(strdup(val));
+        } else if (!strcmp(t, "Node-kind")) {
+            if (!strcmp(val, "dir")) {
+                node_ctx.type = REPO_MODE_DIR;
+            } else if (!strcmp(val, "file")) {
+                node_ctx.type = REPO_MODE_BLB;
+            } else {
+                fprintf(stderr, "Unknown node-kind: %s\n", val);
+            }
+        } else if (!strcmp(t, "Node-action")) {
+            if (!strcmp(val, "delete")) {
+                node_ctx.action = NODEACT_DELETE;
+            } else if (!strcmp(val, "add")) {
+                node_ctx.action = NODEACT_ADD;
+            } else if (!strcmp(val, "change")) {
+                node_ctx.action = NODEACT_CHANGE;
+            } else if (!strcmp(val, "replace")) {
+                node_ctx.action = NODEACT_REPLACE;
+            } else {
+                fprintf(stderr, "Unknown node-action: %s\n", val);
+                node_ctx.action = NODEACT_UNKNOWN;
+            }
+        } else if (!strcmp(t, "Node-copyfrom-path")) {
+            if (node_ctx.src)
+                free(node_ctx.src);
+            node_ctx.src = strdup(val);
+        } else if (!strcmp(t, "Node-copyfrom-rev")) {
+            node_ctx.srcRev = atoi(val);
+        } else if (!strcmp(t, "Text-content-length")) {
+            node_ctx.textLength = atoi(val);
+        } else if (!strcmp(t, "Prop-content-length")) {
+            node_ctx.propLength = atoi(val);
+        } else if (!strcmp(t, "Content-length")) {
+            len = atoi(val);
+            buffer_read_line();
+            if (active_ctx == REV_CTX) {
+                read_props();
+            } else if (active_ctx == NODE_CTX) {
+                handle_node();
+                active_ctx = REV_CTX;
+            } else {
+                fprintf(stderr, "Unexpected content length header: %d\n", len);
+                buffer_skip_bytes(len);
+            }
+        }
+    } 
+    if (active_ctx != DUMP_CTX) handle_revision();
+}
+
+static void svndump_reset(void)
+{
+    repo_reset();
+    reset_dump_ctx(NULL);
+    reset_rev_ctx(0);
+    reset_node_ctx(NULL);
+}
diff --git a/vcs-svn/svndump.h b/vcs-svn/svndump.h
new file mode 100644
index 0000000..b11f666
--- /dev/null
+++ b/vcs-svn/svndump.h
@@ -0,0 +1,7 @@
+#ifndef SVNDUMP_H_
+#define SVNDUMP_H_
+
+static void svndump_read(char *url);
+static void svndump_reset(void);
+
+#endif
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/7] Add buffer pool library
  2010-05-23 21:40 ` [PATCH 3/7] Add buffer pool library Ramkumar Ramachandra
@ 2010-05-24  7:47   ` Peter Baumann
  2010-05-24 10:11     ` Ramkumar Ramachandra
  2010-05-29  8:51   ` Jonathan Nieder
  1 sibling, 1 reply; 29+ messages in thread
From: Peter Baumann @ 2010-05-24  7:47 UTC (permalink / raw)
  To: Ramkumar Ramachandra; +Cc: Git Mailing List, David Michael Barr

On Sun, May 23, 2010 at 11:40:28PM +0200, Ramkumar Ramachandra wrote:
> line_buffer creates a couple of static buffers and expose an API for
> using them. The idea is to maintain a fixed static memory pool to
> avoid constant allocation and de-allocation of memory. Taken directly
> from David Michael Barr's svn-dump-fast-export repository.
> 

If this is David's code, shouldn't you then preserve authorship?

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/7] Add buffer pool library
  2010-05-24  7:47   ` Peter Baumann
@ 2010-05-24 10:11     ` Ramkumar Ramachandra
  2010-05-24 10:37       ` David Michael Barr
  0 siblings, 1 reply; 29+ messages in thread
From: Ramkumar Ramachandra @ 2010-05-24 10:11 UTC (permalink / raw)
  To: Peter Baumann; +Cc: Git Mailing List, David Michael Barr, Sverre Rabbelier

Hi,

On Mon, May 24, 2010 at 9:47 AM, Peter Baumann <waste.manager@gmx.de> wrote:
> If this is David's code, shouldn't you then preserve authorship?

Yeah, I thought about that- can we hand-edit the author since atleast
90% of the code is David's? What are your thoughts on importing some
of the revision history from the original repository?
@David: I'll take the responsibility of cleaning it up/ modifying it
and getting it merged into git.git. You can work on the repository
independently, and I'll pull relevant changes into git.git.

-- Ram

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/7] Add buffer pool library
  2010-05-24 10:11     ` Ramkumar Ramachandra
@ 2010-05-24 10:37       ` David Michael Barr
  0 siblings, 0 replies; 29+ messages in thread
From: David Michael Barr @ 2010-05-24 10:37 UTC (permalink / raw)
  To: Ramkumar Ramachandra; +Cc: Peter Baumann, Git Mailing List, Sverre Rabbelier

Hi,

> @David: I'll take the responsibility of cleaning it up/ modifying it
> and getting it merged into git.git. You can work on the repository
> independently, and I'll pull relevant changes into git.git.

@Ram: On that note, can you sign off on your patches so I can
merge them.

>> If this is David's code, shouldn't you then preserve authorship?


[PATCH 2/7] Add cpp macro implementation of treaps
Authored mostly by Jason Evans.
Heavily culled and adapted to use the object pool library by
David Barr.

[PATCH 7/7] Add handler for SVN dump
Originally authored by in Java by Stefan Hegny and others.
Ported to C and rewritten by David Barr.

Both released under two clause BSD licenses.

Patches 3 to 6 authored primarily by David Barr with
contributions in the form of cleanup and documentation from
Ramkumar Ramachadra.
Released under two-clause BSD license but easily relicensed
due to proximity of authors.

-- David Barr.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 2/7] Add cpp macro implementation of treaps
  2010-05-23 21:40 ` [PATCH 2/7] Add cpp macro implementation of treaps Ramkumar Ramachandra
@ 2010-05-29  7:18   ` Jonathan Nieder
  2010-05-30  9:09     ` Ramkumar Ramachandra
  0 siblings, 1 reply; 29+ messages in thread
From: Jonathan Nieder @ 2010-05-29  7:18 UTC (permalink / raw)
  To: Ramkumar Ramachandra; +Cc: Git Mailing List, David Michael Barr, Jason Evans

Hi Ram,

Ramkumar Ramachandra wrote:

> Taken directly
> from David Michael Barr's svn-dump-fast-export repository.

More details:

  From: Jason Evans <jasone@canonware.com>

  Treaps provide a memory-efficient binary search tree structure.
  Insertion/deletion/search are about as about as fast in the average
  case as red-black trees and the chances of worst-case behavior are
  vanishingly small, thanks to (pseudo-)randomness.  That is a small
  price to pay, given that treaps are much simpler to implement.

  [db: Altered to reference nodes by offset from a common base pointer]
  [db: Bob Jenkins' hashing implementation dropped for Knuth's]
  [db: Methods unnecessary for search and insert dropped]

  Signed-off-by: David Barr <david.barr@cordelta.com>
  Signed-off-by: Ramkumar Ramachandra <artagnon@gmail.com>

I stole most of the description from [1].

> --- /dev/null
> +++ b/vcs-svn/trp.h
> @@ -0,0 +1,221 @@
> +/******************************************************************************
> + *
> + * cpp macro implementation of treaps.
> + *
> + * Usage:
> + *
> + *   #include <trp.h>
> + *   trp(...)
> + *   trp_gen(...)
> + *   ...
> + *
> + ******************************************************************************/

I don’t know if style nitpicking is welcome here, given that the
code comes from elsewhere.  Should we be trying to keep the code
close to Jason’s version (and sending patches upstream as needed),
or is that not worth the trouble?

If style nitpicking were appropriate, I would say that comments in
C code written specifically for git should look like this:

   /*
    * cpp macro implementation of treaps.
   [...]
    */

Okay. :)

> +
> +#ifndef TRP_H_
> +#define	TRP_H_
> +
> +#include <stdint.h>

#include "../git-compat-util.h" would be more portable.

> +/* Pointer/Offset conversion */
> +#define trpn_pointer(a_base, a_offset)					\
> +    (a_base##_pointer(a_offset))
> +#define trpn_offset(a_base, a_pointer)				        \
> +    (a_base##_offset(a_pointer))

These are defined in obj_pool.h?  Maybe this would be easier to
understand if the patch to add that file came sooner in the series.

> +/* Priority accessors. */
> +#define	trp_prio_get(a_type, a_field, a_node)				\
> +    (2654435761*(uint32_t)(uintptr_t)(a_node))

Where does this magic number come from?  (I assume Knuth, but it
would be nice to include a reference or explanation.)

> +/*
> + * The trp_gen() macro generates a type-specific treap implementation,
> + * based on the above cpp macros.
[...]
> + * Assuming the following setup:
> + *
> + *   typedef struct ex_node_s ex_node_t;
> + *   struct ex_node_s {
> + *       trp_node(ex_node_t) ex_link;
> + *   };
> + *   typedef trp(ex_node_t) ex_t;
> + *   static ex_node_t ex_base[MAX_NODES];
> + *   trp_gen(static, ex_, ex_t, ex_node_t, ex_link, ex_base, ex_cmp)
> + *
> + * The following API is generated:
> + *
> + *   static void
> + *   ex_new(ex_t *treap);
> + *       Description: Initialize a treap structure.
> + *       Args:
> + *         treap: Pointer to an uninitialized treap object.
> + *
> + *   static ex_node_t *
> + *   ex_psearch(ex_t *treap, ex_node_t *key);
> + *       Description: Search for node that matches key.  If no match is found,
> + *                    return what would be key's successor/predecessor, were
> + *                    key in treap.
> + *       Args:
> + *         treap: Pointer to a initialized treap object.
> + *         key  : Search key.
> + *       Ret: Node in treap that matches key, or if no match, hypothetical
> + *            node's successor/predecessor (NULL if no successor/predecessor).
[...]

Neat.  Maybe this description should go in a file in
Documentation/technical, to make trp.h itself a little less daunting.

Also: http://www.canonware.com/trp/ seems to provide a test program;
do you think it would make sense to include it as well?

Thanks,
Jonathan

[1] http://t-t-travails.blogspot.com/2008/07/treaps-versus-red-black-trees.html

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/7] Add buffer pool library
  2010-05-23 21:40 ` [PATCH 3/7] Add buffer pool library Ramkumar Ramachandra
  2010-05-24  7:47   ` Peter Baumann
@ 2010-05-29  8:51   ` Jonathan Nieder
  2010-05-29 10:55     ` David Michael Barr
  1 sibling, 1 reply; 29+ messages in thread
From: Jonathan Nieder @ 2010-05-29  8:51 UTC (permalink / raw)
  To: Ramkumar Ramachandra; +Cc: Git Mailing List, David Michael Barr

Hi Ram,

Ramkumar Ramachandra wrote:

> line_buffer creates a couple of static buffers and expose an API for
> using them.

So this provides a thread-unsafe fgets() and fread() where the caller
does not have to supply a buffer.  Sounds convenient.

> Taken directly
> from David Michael Barr's svn-dump-fast-export repository.
>
> Signed-off-by: Ramkumar Ramachandra <artagnon@gmail.com>

Missing From: line and sign-off.

[...]
> +char *buffer_read_line(void)
> +{
> +    char *end;

style nitpick: use tabs to indent.

> +    uint32_t n_read;
> +
> +    if (line_len) {
> +        memmove(line_buffer, &line_buffer[line_len],
> +                line_buffer_len - line_len);
> +        line_buffer_len -= line_len;
> +        line_len = 0;
> +    }
> +
> +    end = memchr(line_buffer, '\n', line_buffer_len);
> +    while (line_buffer_len < LINE_BUFFER_LEN - 1 &&
> +           !feof(stdin) && NULL == end) {
> +        n_read =
> +            fread(&line_buffer[line_buffer_len], 1,
> +                  LINE_BUFFER_LEN - 1 - line_buffer_len,
> +                  stdin);
> +        end = memchr(&line_buffer[line_buffer_len], '\n', n_read);
> +        line_buffer_len += n_read;
> +    }

Why not fgets()?

[...]
> +char *buffer_read_string(uint32_t len)
> +{
> +    char *s = malloc(len + 1);
> +    uint32_t offset = 0;
> +    if (line_buffer_len > line_len) {
> +        offset = line_buffer_len - line_len;
> +        if (offset > len)
> +            offset = len;
> +        memcpy(s, &line_buffer[line_len], offset);

So if this buffer library is in use, all input needs to pass through
it?  I would prefer to avoid that if possible.

> +        line_len += offset;
> +    }
> +    while (offset < len && !feof(stdin)) {
> +        offset += fread(&s[offset], 1, len - offset, stdin);
> +    }

On error, wouldn’t this be an infinite loop?  Maybe:

  offset += fread(&s[offset], 1, len - offset, stdin);
  if (ferror(stdin)) {
	free(s);
	return NULL;
 }

One iteration should be sufficient, since fread loops internally.

[...]
> +void buffer_copy_bytes(uint32_t len)
> +{
> +    uint32_t in, out;
> +    if (line_buffer_len > line_len) {
> +        in = line_buffer_len - line_len;
> +        if (in > len)
> +            in = len;
> +        out = 0;
> +        while (out < in && !ferror(stdout)) {
> +            out +=
> +                fwrite(&line_buffer[line_len + out], 1, in - out, stdout);
> +        }

Likewise.

> +        len -= in;
> +        line_len += in;
> +    }
> +    while (len > 0 && !feof(stdin)) {
> +        in = len < COPY_BUFFER_LEN ? len : COPY_BUFFER_LEN;
> +        in = fread(byte_buffer, 1, in, stdin);
> +        len -= in;
> +        out = 0;
> +        while (out < in && !ferror(stdout)) {
> +            out += fwrite(&byte_buffer[out], 1, in - out, stdout);

Likewise.

Why isn’t line_buffer used here?

[...]
> +void buffer_skip_bytes(uint32_t len)
> +{
[...]
> +    while (len > 0 && !feof(stdin)) {
> +        in = len < COPY_BUFFER_LEN ? len : COPY_BUFFER_LEN;
> +        in = fread(byte_buffer, 1, in, stdin);
> +        len -= in;
> +    }

Likewise.

Too bad stdio does not supply a function for this (fseek is the
closest I can find, and it does not work on unseekable files).

Thanks,
Jonathan

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 4/7] Add a memory pool library
  2010-05-23 21:40 ` [PATCH 4/7] Add a memory " Ramkumar Ramachandra
@ 2010-05-29  9:06   ` Jonathan Nieder
  2010-05-30  9:12     ` Ramkumar Ramachandra
  0 siblings, 1 reply; 29+ messages in thread
From: Jonathan Nieder @ 2010-05-29  9:06 UTC (permalink / raw)
  To: Ramkumar Ramachandra; +Cc: Git Mailing List, David Michael Barr

Ramkumar Ramachandra wrote:

> Add a memory pool library implemented using cpp macros.

Hmm, like talloc[1].  How do they compare?

[...]
> + *   pre              : Prefix for generated functions (ex: string_).
> + *   obj_t            : Type for treap data structure (ex: char).
> + *   intial_capacity  : The initial size of the memory pool (ex: 4096).
> + *
> + */
> +#define obj_pool_gen(pre, obj_t, initial_capacity)                         \
> +static struct {                                                            \
> +    uint32_t size;                                                         \
> +    uint32_t capacity;                                                     \
> +    obj_t *base;                                                           \
> +} pre##_pool = { 0, 0, NULL};                                              \
> +static uint32_t pre##_alloc(uint32_t count)                                \
[...]

This makes functions with names like string_alloc(), which would then
be un-greppable.  It might be nice (though certainly not necessary) to
provide an overview in Documentation/technical/ so there is no need to
find the definition of each function.

Jonathan

[1] http://ccan.ozlabs.org/info/talloc.html

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/7] Add buffer pool library
  2010-05-29  8:51   ` Jonathan Nieder
@ 2010-05-29 10:55     ` David Michael Barr
  0 siblings, 0 replies; 29+ messages in thread
From: David Michael Barr @ 2010-05-29 10:55 UTC (permalink / raw)
  To: Jonathan Nieder; +Cc: Ramkumar Ramachandra, Git Mailing List

Ji Jonathan,

>> line_buffer creates a couple of static buffers and expose an API for
>> using them.
> 
> So this provides a thread-unsafe fgets() and fread() where the caller
> does not have to supply a buffer.  Sounds convenient.

Very convenient indeed. Its primary reason for existence is to assist
debugging the svn dump parser.

> Missing From: line and sign-off.

I'm sure Ram will fix this up in the next rebase.

>> +char *buffer_read_line(void)
>> +{
>> +    char *end;
> 
> style nitpick: use tabs to indent.

Will read git style guide and try to match for convenience.

> Why not fgets()?

Historical reasons, can be factored out.

> So if this buffer library is in use, all input needs to pass through
> it?  I would prefer to avoid that if possible.

The design is almost single-caller anyway...

>> +        line_len += offset;
>> +    }
>> +    while (offset < len && !feof(stdin)) {
>> +        offset += fread(&s[offset], 1, len - offset, stdin);
>> +    }
> 
> On error, wouldn’t this be an infinite loop?  Maybe:
> 
>  offset += fread(&s[offset], 1, len - offset, stdin);
>  if (ferror(stdin)) {
> 	free(s);
> 	return NULL;
> }
> 
> One iteration should be sufficient, since fread loops internally

Thanks for the tip.

--
David Barr

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 5/7] Add API for string-specific memory pool
  2010-05-23 21:40 ` [PATCH 5/7] Add API for string-specific memory pool Ramkumar Ramachandra
@ 2010-05-29 11:38   ` Jonathan Nieder
  2010-05-30  9:38     ` Ramkumar Ramachandra
  2010-05-30 16:52     ` Ramkumar Ramachandra
  0 siblings, 2 replies; 29+ messages in thread
From: Jonathan Nieder @ 2010-05-29 11:38 UTC (permalink / raw)
  To: Ramkumar Ramachandra; +Cc: Git Mailing List, David Michael Barr

Ramkumar Ramachandra wrote:

> string_pool uses the macros in the memory pool library to create a
> memory pool for strings and expose an API for handling the
> strings.

This interns strings so they can be compared by address.  Interesting.
The use of offsets instead of pointers here mean it is possible to
back the pool by a file, if I understand correctly.

> Taken directly from David Michael Barr's svn-dump-fast-export
> repository.

Missing From: and sign-off.

[...]
> +static int node_indentity_cmp(node_t *a, node_t *b)
> +{
> +    int r = node_value_cmp(a, b);
> +    return r ? r : (((uintptr_t) a) > ((uintptr_t) b))
> +        - (((uintptr_t) a) < ((uintptr_t) b));

nitpick: could use fewer parentheses.

	return r ? r : (((uintptr_t) a > (uintptr_t) b) - ...

Are the casts to uintptr_t necessary?  C99 says, under "Relational
operators", paragraph 5:

  When two pointers are compared, the result depends on the relative
  locations in the address space of the objects pointed to. If two
  pointers to object or incomplete types both point to the same
  object, or both point one past the last element of the same array
  object, they compare equal. If the objects pointed to are members of
  the same aggregate object,
  [etc]

which seems to suggest that no, simple expressions like (a > b) should
be okay, but then it finishes with

  In all other cases, the behavior is undefined.

so I guess it is safer to keep the casts.

> diff --git a/vcs-svn/string_pool.h b/vcs-svn/string_pool.h
> new file mode 100644
> index 0000000..fb9e6b8
> --- /dev/null
> +++ b/vcs-svn/string_pool.h
> @@ -0,0 +1,11 @@
> +#ifndef STRING_POOL_H_
> +#define	STRING_POOL_H_

style nitpick: should use space instead of tab

> +
> +#include <stdint.h>
> +#include <stdio.h>

Should use "../git-compat-util.h" for portability: unfortunately, some
platforms git supports still don’t have stdint.h iirc.

Except as noted above,

  Reviewed-by: Jonathan Nieder <jrnieder@gmail.com>

Thanks for the pleasant read.
Jonathan

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 6/7] Add SVN revision parser and exporter
  2010-05-23 21:40 ` [PATCH 6/7] Add SVN revision parser and exporter Ramkumar Ramachandra
@ 2010-05-29 14:06   ` Jonathan Nieder
  2010-05-30 15:58     ` Ramkumar Ramachandra
  0 siblings, 1 reply; 29+ messages in thread
From: Jonathan Nieder @ 2010-05-29 14:06 UTC (permalink / raw)
  To: Ramkumar Ramachandra; +Cc: Git Mailing List, David Michael Barr

Hi again,

Ramkumar Ramachandra wrote:

> repo_tree parses SVN revisions to build a Git objects, and use
> fast_export to emit them so they can be imported into the Git object
> store via a fast-import.

Wait, where are SVN revisions being parsed?  It seems that the repo
module maintains the exporter's state and provides a facility to
to call the fast_export module to write it out.

  repo_add(path, mode, blob_mark) is used to add a new file to
  the current commit.

  repo_modify is used to add a replacement for an existing file;
  it is implemented exactly the same way, but a check could be
  added later to distinguish the two cases.

  repo_copy copies a blob from a previous revision to the current
  commit.

  repo_replace modifies the content of a file from the current
  commit, if and only if it already exists.

  repo_delete removes a file or directory from the current commit.

  repo_commit calls out to fast_export to write the current commit
  to the fast-import stream in stdout.

  repo_diff is used by the fast_export module to write the changes
  for a commit.

  repo_reset erases the exporter's state, so valgrind can be happy.

> +void fast_export_modify(uint32_t depth, uint32_t *path, uint32_t mode,
> +                        uint32_t mark)
> +{
> +    printf("M %06o :%d ", mode, mark);
> +    pool_print_seq(depth, path, '/', stdout);
> +    putchar('\n');
> +}

Mode must be 100644, 100755, 120000, or 160000.

[...]
> +typedef struct repo_dirent_s repo_dirent_t;
> +
> +struct repo_dirent_s {
> +    uint32_t name_offset;
> +    uint32_t mode;
> +    uint32_t content_offset;
> +};
> +
> +typedef struct repo_dir_s repo_dir_t;
> +
> +struct repo_dir_s {
> +    uint32_t size;
> +    uint32_t first_offset;
> +};
> +
> +typedef struct repo_commit_s repo_commit_t;
> +
> +struct repo_commit_s {
> +    uint32_t mark;
> +    uint32_t root_dir_offset;
> +};

Style: we don’t tend to use typedef to hide underlying struct definitions
(see Documentation/CodingStyle from linux-2.6.git, chapter 5, for some
explanation about why).

> +static uint32_t num_dirs_saved = 0;
> +static uint32_t num_dirents_saved = 0;

Style: leave out the initializer for bss-allocated variables.

> +static int repo_dirent_name_cmp(const void *a, const void *b)
> +{
> +    return (((repo_dirent_t *) a)->name_offset
> +            > ((repo_dirent_t *) b)->name_offset) -
> +        (((repo_dirent_t *) a)->name_offset
> +         < ((repo_dirent_t *) b)->name_offset);
> +}

Maybe some local variables would make this more readable:

 {
	const repo_dirent_t *dirent1 = a, *dirent2 = b;
	uint32_t a_offset = dirent1->name_offset;
	uint32_t b_offset = dirent2->name_offset;

	return (a_offset > b_offset) - (a_offset < b_offset);
 }

> +static repo_dir_t *repo_clone_dir(repo_dir_t *orig_dir, uint32_t padding)
> +{
> +    uint32_t orig_o, new_o, dirent_o;
> +    orig_o = dir_offset(orig_dir);
> +    if (orig_o < num_dirs_saved) {
> +        new_o = dir_with_dirents_alloc(orig_dir->size + padding);
> +        orig_dir = dir_pointer(orig_o);
> +        dirent_o = dir_pointer(new_o)->first_offset;
> +    } else {
> +        if (padding == 0)
> +            return orig_dir;
> +        new_o = orig_o;
> +        dirent_o = dirent_alloc(orig_dir->size + padding);
> +    }
> +    memcpy(dirent_pointer(dirent_o), repo_first_dirent(orig_dir),
> +           orig_dir->size * sizeof(repo_dirent_t));
> +    dir_pointer(new_o)->size = orig_dir->size + padding;
> +    dir_pointer(new_o)->first_offset = dirent_o;
> +    return dir_pointer(new_o);
> +}

Is this for adding new entries to an existing directory?  It is
getting late, so I did not look it over carefully.

> +static char repo_path_buffer[REPO_MAX_PATH_LEN];
> +static repo_dirent_t *repo_read_dirent(uint32_t revision, char *path)
> +{
> +    char *ctx = NULL;
> +    uint32_t name = 0;
> +    repo_dir_t *dir = NULL;
> +    repo_dirent_t *dirent = NULL;
> +    dir = repo_commit_root_dir(commit_pointer(revision));
> +    strncpy(repo_path_buffer, path, REPO_MAX_PATH_LEN);
> +    repo_path_buffer[REPO_MAX_PATH_LEN - 1] = '\0';
> +    path = repo_path_buffer;
> +    for (name = pool_tok_r(path, "/", &ctx);
> +         ~name; name = pool_tok_r(NULL, "/", &ctx)) {
> +        dirent = repo_dirent_by_name(dir, name);
> +        if (dirent == NULL) {
> +            return NULL;
> +        } else if (repo_dirent_is_dir(dirent)) {
> +            dir = repo_dir_from_dirent(dirent);
> +        } else {
> +            break;
> +        }
> +    }
> +    return dirent;
> +}

If a file "foo" exists and I ask for "foo/bar", this will return
the entry for foo.  Is that appropriate?

> +static void
> +repo_write_dirent(char *path, uint32_t mode, uint32_t content_offset,
> +                  uint32_t del)
> +{
> +    char *ctx;
> +    uint32_t name, revision, dirent_o, dir_o, parent_dir_o;
> +    repo_dir_t *dir;
> +    repo_dirent_t *dirent = NULL;
> +    revision = active_commit;
> +    dir = repo_commit_root_dir(commit_pointer(revision));
> +    dir = repo_clone_dir(dir, 0);
> +    commit_pointer(revision)->root_dir_offset = dir_offset(dir);
> +    strncpy(repo_path_buffer, path, REPO_MAX_PATH_LEN);
> +    repo_path_buffer[REPO_MAX_PATH_LEN - 1] = '\0';
> +    path = repo_path_buffer;
> +    for (name = pool_tok_r(path, "/", &ctx); ~name;
> +         name = pool_tok_r(NULL, "/", &ctx)) {
> +        parent_dir_o = dir_offset(dir);
> +        dirent = repo_dirent_by_name(dir, name);
> +        if (dirent == NULL) {

static repo_dir_t *add_subdir(uint32_t parent_dir_o, repo_dir_t *dir,
                              uint32_t name)
{

> +            dir = repo_clone_dir(dir, 1);
> +            dirent = &repo_first_dirent(dir)[dir->size - 1];
> +            dirent->name_offset = name;
> +            dirent->mode = REPO_MODE_DIR;
> +            qsort(repo_first_dirent(dir), dir->size,
> +                  sizeof(repo_dirent_t), repo_dirent_name_cmp);
> +            dirent = repo_dirent_by_name(dir, name);
> +            dir_o = dir_with_dirents_alloc(0);
> +            dirent->content_offset = dir_o;
> +            dir = dir_pointer(dir_o);

}

> +        } else if ((dir = repo_dir_from_dirent(dirent))) {
> +            dirent_o = dirent_offset(dirent);
> +            dir = repo_clone_dir(dir, 0);
> +            if (dirent_o != ~0)
> +                dirent_pointer(dirent_o)->content_offset = dir_offset(dir);
> +        } else {

static repo_dir_t *replace_with_empty_dir(uint32_t dirent)
{

> +            dirent->mode = REPO_MODE_DIR;
> +            dirent_o = dirent_offset(dirent);
> +            dir_o = dir_with_dirents_alloc(0);
> +            dirent = dirent_pointer(dirent_o);
> +            dir = dir_pointer(dir_o);
> +            dirent->content_offset = dir_o;

}

> +        }
> +    }
> +    if (dirent) {

When would it be NULL?  Is an empty path allowed?

static void fill_dirent(repo_dirent_t *dirent,
                        uint32_t mode, uint32_t content_offset,
                        uint32_t parent_dir_o, int del)
{

> +        dirent->mode = mode;
> +        dirent->content_offset = content_offset;
> +        if (del && ~parent_dir_o) {
> +            dirent->name_offset = ~0;
> +            dir = dir_pointer(parent_dir_o);
> +            qsort(repo_first_dirent(dir), dir->size,
> +                  sizeof(repo_dirent_t), repo_dirent_name_cmp);
> +            dir->size--;
> +        }

}

> +    }
> +}
[...]

> +static void
> +repo_git_add_r(uint32_t depth, uint32_t *path, repo_dir_t *dir);
> +
> +static void
> +repo_git_add(uint32_t depth, uint32_t *path, repo_dirent_t *dirent)
> +{
> +    if (repo_dirent_is_dir(dirent)) {
> +        repo_git_add_r(depth, path, repo_dir_from_dirent(dirent));
> +    } else {
> +        fast_export_modify(depth, path, dirent->mode, dirent->content_offset);
> +    }
> +}

Just curious: does gcc notice the tail calls?

> +static void
> +repo_git_add_r(uint32_t depth, uint32_t *path, repo_dir_t *dir)
> +{
> +    uint32_t o;
> +    repo_dirent_t *de;
> +    de = repo_first_dirent(dir);
> +    for (o = 0; o < dir->size; o++) {
> +        path[depth] = de[o].name_offset;
> +        repo_git_add(depth + 1, path, &de[o]);
> +    }
> +}

Can this overflow the path stack?

> +
> +static void
> +repo_diff_r(uint32_t depth, uint32_t *path, repo_dir_t *dir1,
> +            repo_dir_t *dir2)
> +{
> +    repo_dirent_t *de1, *de2, *max_de1, *max_de2;
> +    de1 = repo_first_dirent(dir1);
> +    de2 = repo_first_dirent(dir2);
> +    max_de1 = &de1[dir1->size];
> +    max_de2 = &de2[dir2->size];
> +
> +    while (de1 < max_de1 && de2 < max_de2) {
> +        if (de1->name_offset < de2->name_offset) {
> +            path[depth] = (de1++)->name_offset;
> +            fast_export_delete(depth + 1, path);
> +        } else if (de1->name_offset == de2->name_offset) {
> +            path[depth] = de1->name_offset;
> +            if (de1->content_offset != de2->content_offset) {
> +                if (repo_dirent_is_dir(de1) && repo_dirent_is_dir(de2)) {
> +                    repo_diff_r(depth + 1, path,
> +                                repo_dir_from_dirent(de1),
> +                                repo_dir_from_dirent(de2));
> +                } else {
> +                    if (repo_dirent_is_dir(de1) != repo_dirent_is_dir(de2)) {
> +                        fast_export_delete(depth + 1, path);
> +                    }
> +                    repo_git_add(depth + 1, path, de2);
> +                }
> +            }
> +            de1++;
> +            de2++;
> +        } else {
> +            path[depth] = de2->name_offset;
> +            repo_git_add(depth + 1, path, de2++);
> +        }

Might be easier to read the other way around:

		if (de1->name_offset < de2->name_offset) {
			path[depth] = (de1++)->name_offset;
			fast_export_delete(depth + 1, path);
			continue;
		}
		if (de1->name_offset > de2->name_offset) {
			path[depth] = de2->name_offset;
			repo_git_add(depth + 1, path, de2++);
			continue;
		}
		... matching paths case ...

> +    }
> +    while (de1 < max_de1) {
> +        path[depth] = (de1++)->name_offset;
> +        fast_export_delete(depth + 1, path);
> +    }
> +    while (de2 < max_de2) {
> +        path[depth] = de2->name_offset;
> +        repo_git_add(depth + 1, path, de2++);
> +    }
> +}
[...]

> +void repo_commit(uint32_t revision, char *author, char *log, char *uuid,
> +                 char *url, time_t timestamp)
> +{
> +    if (revision == 0) {
> +        active_commit = commit_alloc(1);
> +        commit_pointer(active_commit)->root_dir_offset =
> +            dir_with_dirents_alloc(0);
> +    } else {
> +        fast_export_commit(revision, author, log, uuid, url, timestamp);
> +    }
> +    num_dirs_saved = dir_pool.size;
> +    num_dirents_saved = dirent_pool.size;

I did not carefully trace the cases where repo_clone_dir may reuse a
dir.  I would be happier if someone else does.

The rest looked good to me, for what it’s worth.

Thanks,
Jonathan

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 7/7] Add handler for SVN dump
  2010-05-23 21:40 ` [PATCH 7/7] Add handler for SVN dump Ramkumar Ramachandra
@ 2010-05-30  8:59   ` Jonathan Nieder
  2010-05-30 10:45     ` Ramkumar Ramachandra
  0 siblings, 1 reply; 29+ messages in thread
From: Jonathan Nieder @ 2010-05-30  8:59 UTC (permalink / raw)
  To: Ramkumar Ramachandra; +Cc: Git Mailing List, David Michael Barr

Ramkumar Ramachandra wrote:

> Expose an API to parse an SVN dump created with `svnadmin dump` and
> emit a fast-import stream using fast_export and repo_tree. This is a
> temporary workaround: it will be replaced with a client that
> communicates live with a remote SVN server in future.

Hmm.  I suspect the svndump-to-fastimport converter might be useful as
a sort of swiss army knife even after remote-svn moves on to use
the ra lib.  What would you think about putting a main() function to
build the standalone program under contrib/fast-import?

I can write a patch for it for next round if you like the idea.

Jonathan

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 2/7] Add cpp macro implementation of treaps
  2010-05-29  7:18   ` Jonathan Nieder
@ 2010-05-30  9:09     ` Ramkumar Ramachandra
  2010-05-30  9:31       ` Jonathan Nieder
  0 siblings, 1 reply; 29+ messages in thread
From: Ramkumar Ramachandra @ 2010-05-30  9:09 UTC (permalink / raw)
  To: Jonathan Nieder; +Cc: Git Mailing List, David Michael Barr, Jason Evans

Hi Jonathan,

Jonathan Nieder <jrnieder@gmail.com> wrote:
>  From: Jason Evans <jasone@canonware.com>
>
>  Treaps provide a memory-efficient binary search tree structure.
>  Insertion/deletion/search are about as about as fast in the average
>  case as red-black trees and the chances of worst-case behavior are
>  vanishingly small, thanks to (pseudo-)randomness.  That is a small
>  price to pay, given that treaps are much simpler to implement.
>
>  [db: Altered to reference nodes by offset from a common base pointer]
>  [db: Bob Jenkins' hashing implementation dropped for Knuth's]
>  [db: Methods unnecessary for search and insert dropped]
>
>  Signed-off-by: David Barr <david.barr@cordelta.com>
>  Signed-off-by: Ramkumar Ramachandra <artagnon@gmail.com>

Thanks. This can go into the commit message when I re-send the series.

> I don’t know if style nitpicking is welcome here, given that the
> code comes from elsewhere.  Should we be trying to keep the code
> close to Jason’s version (and sending patches upstream as needed),
> or is that not worth the trouble?

In my opinion, style nitpicks are welcome: it should conform to the
git.git style. I'm anyway maintaining a separate branch of master just
for changes specific to the git.git merge- Jason and David are welcome
to pull patches from there.

> #include "../git-compat-util.h" would be more portable.

Right, this should be one major change in all the files in the git-merge branch.

> These are defined in obj_pool.h?  Maybe this would be easier to
> understand if the patch to add that file came sooner in the series.

Okay. Note that in the revision history, trp.h was first imported, and
then culled to use obj_pool.h by David.

> Where does this magic number come from?  (I assume Knuth, but it
> would be nice to include a reference or explanation.)

Indeed. 2654435761 is the golden ratio number corresponding with 2^32.
See http://bit.ly/cwAfD4

> Neat.  Maybe this description should go in a file in
> Documentation/technical, to make trp.h itself a little less daunting.

Okay, I'll put it in a separate file then.

> Also: http://www.canonware.com/trp/ seems to provide a test program;
> do you think it would make sense to include it as well?

Probably in documentation/technical?

> [1] http://t-t-travails.blogspot.com/2008/07/treaps-versus-red-black-trees.html

This is a good read, thanks.

-- Ram

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 4/7] Add a memory pool library
  2010-05-29  9:06   ` Jonathan Nieder
@ 2010-05-30  9:12     ` Ramkumar Ramachandra
  2010-05-30  9:55       ` Jonathan Nieder
  0 siblings, 1 reply; 29+ messages in thread
From: Ramkumar Ramachandra @ 2010-05-30  9:12 UTC (permalink / raw)
  To: Jonathan Nieder; +Cc: Git Mailing List, David Michael Barr

Hi Jonathan,

Jonathan Nieder <jrnieder@gmail.com> wrote:
> Hmm, like talloc[1].  How do they compare?

I've never used talloc. What basis can we use for the comparison?

> This makes functions with names like string_alloc(), which would then
> be un-greppable.  It might be nice (though certainly not necessary) to
> provide an overview in Documentation/technical/ so there is no need to
> find the definition of each function.

Definitely. On a related note, note that while compiling, a lot of
unused functions are generated which pop up as compiler warnings- I
haven't figured out how to suppress them yet, and I suspect Junio
won't be happy with them either.

-- Ram

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 2/7] Add cpp macro implementation of treaps
  2010-05-30  9:09     ` Ramkumar Ramachandra
@ 2010-05-30  9:31       ` Jonathan Nieder
  2010-05-30  9:33         ` Ramkumar Ramachandra
  0 siblings, 1 reply; 29+ messages in thread
From: Jonathan Nieder @ 2010-05-30  9:31 UTC (permalink / raw)
  To: Ramkumar Ramachandra; +Cc: Git Mailing List, David Michael Barr, Jason Evans

Ramkumar Ramachandra wrote:
> Jonathan Nieder <jrnieder@gmail.com> wrote:

>> Also: http://www.canonware.com/trp/ seems to provide a test program;
>> do you think it would make sense to include it as well?
>
> Probably in documentation/technical?

I was thinking in the toplevel, but I wasn’t thinking about how much
of trp.h was removed.

Of course svn-fe itself is a pretty good test that the treap is
working as it should.  So I wouldn’t worry about it.

Jonathan

 Makefile               |    1 +
 t/t0070-fundamental.sh |    4 ++++
 test-treap.c           |    8 ++++++++
 3 files changed, 13 insertions(+), 0 deletions(-)
 create mode 100644 test-treap.c

diff --git a/Makefile b/Makefile
index 2f5d631..a26bceb 100644
--- a/Makefile
+++ b/Makefile
@@ -403,6 +403,7 @@ TEST_PROGRAMS_NEED_X += test-path-utils
 TEST_PROGRAMS_NEED_X += test-run-command
 TEST_PROGRAMS_NEED_X += test-sha1
 TEST_PROGRAMS_NEED_X += test-sigchain
+TEST_PROGRAMS_NEED_X += test-treap
 TEST_PROGRAMS_NEED_X += test-index-version
 
 TEST_PROGRAMS = $(patsubst %,%$X,$(TEST_PROGRAMS_NEED_X))
diff --git a/t/t0070-fundamental.sh b/t/t0070-fundamental.sh
index 680d7d6..7522bdc 100755
--- a/t/t0070-fundamental.sh
+++ b/t/t0070-fundamental.sh
@@ -12,4 +12,8 @@ test_expect_success 'character classes (isspace, isalpha etc.)' '
 	test-ctype
 '
 
+test_expect_success 'treap data structure' '
+	test-treap
+'
+
 test_done
diff --git a/test-treap.c b/test-treap.c
new file mode 100644
index 0000000..f94a931
--- /dev/null
+++ b/test-treap.c
@@ -0,0 +1,8 @@
+#include "git-compat-util.h"
+#include "vcs-svn/trp.h"
+#include "vcs-svn/obj_pool.h"
+
+#define NNODES 32
+#define NSETS 200
+
+tests go here
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* Re: [PATCH 2/7] Add cpp macro implementation of treaps
  2010-05-30  9:31       ` Jonathan Nieder
@ 2010-05-30  9:33         ` Ramkumar Ramachandra
  0 siblings, 0 replies; 29+ messages in thread
From: Ramkumar Ramachandra @ 2010-05-30  9:33 UTC (permalink / raw)
  To: Jonathan Nieder; +Cc: Git Mailing List, David Michael Barr, Jason Evans

Hi Jonathan,

Jonathan Nieder <jrnieder@gmail.com> wrote:
>  Makefile               |    1 +
>  t/t0070-fundamental.sh |    4 ++++
>  test-treap.c           |    8 ++++++++
>  3 files changed, 13 insertions(+), 0 deletions(-)
>  create mode 100644 test-treap.c
>
> diff --git a/Makefile b/Makefile
> index 2f5d631..a26bceb 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -403,6 +403,7 @@ TEST_PROGRAMS_NEED_X += test-path-utils
>  TEST_PROGRAMS_NEED_X += test-run-command
>  TEST_PROGRAMS_NEED_X += test-sha1
>  TEST_PROGRAMS_NEED_X += test-sigchain
> +TEST_PROGRAMS_NEED_X += test-treap
>  TEST_PROGRAMS_NEED_X += test-index-version
>
>  TEST_PROGRAMS = $(patsubst %,%$X,$(TEST_PROGRAMS_NEED_X))
> diff --git a/t/t0070-fundamental.sh b/t/t0070-fundamental.sh
> index 680d7d6..7522bdc 100755
> --- a/t/t0070-fundamental.sh
> +++ b/t/t0070-fundamental.sh
> @@ -12,4 +12,8 @@ test_expect_success 'character classes (isspace, isalpha etc.)' '
>        test-ctype
>  '
>
> +test_expect_success 'treap data structure' '
> +       test-treap
> +'
> +
>  test_done
> diff --git a/test-treap.c b/test-treap.c
> new file mode 100644
> index 0000000..f94a931
> --- /dev/null
> +++ b/test-treap.c
> @@ -0,0 +1,8 @@
> +#include "git-compat-util.h"
> +#include "vcs-svn/trp.h"
> +#include "vcs-svn/obj_pool.h"
> +
> +#define NNODES 32
> +#define NSETS 200
> +
> +tests go here
> --
> 1.7.1

Thanks for the template. I'll start writing tests right away.

-- Ram

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 5/7] Add API for string-specific memory pool
  2010-05-29 11:38   ` Jonathan Nieder
@ 2010-05-30  9:38     ` Ramkumar Ramachandra
  2010-05-30 10:09       ` Jonathan Nieder
  2010-05-30 16:52     ` Ramkumar Ramachandra
  1 sibling, 1 reply; 29+ messages in thread
From: Ramkumar Ramachandra @ 2010-05-30  9:38 UTC (permalink / raw)
  To: Jonathan Nieder; +Cc: Git Mailing List, David Michael Barr

Hi Jonathan,

Jonathan Nieder wrote:
> This interns strings so they can be compared by address.  Interesting.
> The use of offsets instead of pointers here mean it is possible to
> back the pool by a file, if I understand correctly.

Exactly. The future plan is to use file backing to support incremental dumps.

> Missing From: and sign-off.

Right, I'll fix this in all the patches in the series in the re-send.

> nitpick: could use fewer parentheses.

Ok.

> so I guess it is safer to keep the casts.

Right. I don't want compiler warnings either- Sverre and I had to work
quite hard to eliminate many of them (commit seen in master now).

> style nitpick: should use space instead of tab

Quick doubt: Does running whitespace-cleanup (Emacs) on all the files
before generating patches suffice?

> Should use "../git-compat-util.h" for portability: unfortunately, some
> platforms git supports still don’t have stdint.h iirc.

Oh. What do we do about stdint.h then?

-- Ram

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 4/7] Add a memory pool library
  2010-05-30  9:12     ` Ramkumar Ramachandra
@ 2010-05-30  9:55       ` Jonathan Nieder
  2010-05-30 10:51         ` Ramkumar Ramachandra
  0 siblings, 1 reply; 29+ messages in thread
From: Jonathan Nieder @ 2010-05-30  9:55 UTC (permalink / raw)
  To: Ramkumar Ramachandra; +Cc: Git Mailing List, David Michael Barr

Hi Ram,

Ramkumar Ramachandra wrote:

> I've never used talloc. What basis can we use for the comparison?

Ah, a little explanation is in order.

You see, I read the commit message, having just looked at the treap
implementation, read the word “allocator”, and decided that a
mini-malloc was exactly the sort of thing I did not want to read right
then.  Hence the silly comment about some allocator that pools memory
for heterogeneous-sized blocks (which is very nice and convenient, but
not so relevant!).  Sorry --- I should not have been so lazy.

In fact, this patch just maintains a growing array of never-freed
fixed-size blocks.  It is very similar to the existing alloc.c but
with some small differences:

 . uses a single memory area that grows with realloc, so it can be
   freed for valgrind-cleanness if one wants.

 . grows a little more aggressively, as David pointed out

And it is much simpler than I was thinking, so honestly, I don’t
even mind the code duplication so much.

For interest, the analogous CCAN module is
http://ccan.ozlabs.org/info/block_pool.html

> On a related note, note that while compiling, a lot of
> unused functions are generated which pop up as compiler warnings

Maybe __attribute__((__unused__)) could work.

Sorry for the nonsense,
Jonathan

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 5/7] Add API for string-specific memory pool
  2010-05-30  9:38     ` Ramkumar Ramachandra
@ 2010-05-30 10:09       ` Jonathan Nieder
  0 siblings, 0 replies; 29+ messages in thread
From: Jonathan Nieder @ 2010-05-30 10:09 UTC (permalink / raw)
  To: Ramkumar Ramachandra; +Cc: Git Mailing List, David Michael Barr

Ramkumar Ramachandra wrote:
> Jonathan Nieder wrote:

>> style nitpick: should use space instead of tab
> 
> Quick doubt: Does running whitespace-cleanup (Emacs) on all the files
> before generating patches suffice?

No idea.  Documentation/CodingStyle in linux-2.6 has some emacs tips.

>> some platforms git supports still don’t have stdint.h iirc.
>
> Oh. What do we do about stdint.h then?

Luckily, the systems without stdint.h have inttypes.h (and vice
versa[1]), so git-compat-util.h takes care of it.

Jonathan

[1] inttypes.h is the older standard, and almost all systems support
it, but Interix has stdint.h but not inttypes.h.  So it goes.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 7/7] Add handler for SVN dump
  2010-05-30  8:59   ` Jonathan Nieder
@ 2010-05-30 10:45     ` Ramkumar Ramachandra
  0 siblings, 0 replies; 29+ messages in thread
From: Ramkumar Ramachandra @ 2010-05-30 10:45 UTC (permalink / raw)
  To: Jonathan Nieder; +Cc: Git Mailing List, David Michael Barr

Hi,

Jonathan Nieder wrote:
> Hmm.  I suspect the svndump-to-fastimport converter might be useful as
> a sort of swiss army knife even after remote-svn moves on to use
> the ra lib.  What would you think about putting a main() function to
> build the standalone program under contrib/fast-import?

Yes, I like the idea. I'll just sort out a few things in my GitHub
fork and tell you when something's ready.

> I can write a patch for it for next round if you like the idea.

Um, I'd suggest waiting a little bit- my fork's in a bit of a mess
with all these changes from your reviews, and I don't want to merge in
more changes yet :p

-- Ram

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 4/7] Add a memory pool library
  2010-05-30  9:55       ` Jonathan Nieder
@ 2010-05-30 10:51         ` Ramkumar Ramachandra
  0 siblings, 0 replies; 29+ messages in thread
From: Ramkumar Ramachandra @ 2010-05-30 10:51 UTC (permalink / raw)
  To: Jonathan Nieder; +Cc: Git Mailing List, David Michael Barr

Hi Jonathan,

Jonathan Nieder wrote:
> And it is much simpler than I was thinking, so honestly, I don’t
> even mind the code duplication so much.

Ok, we can keep this code then.

> Maybe __attribute__((__unused__)) could work.

I thought of exactly that, but it needs to be used at declaration time
which doesn't seem to be possible since the function is declared by
cpp macros.

-- Ram

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 6/7] Add SVN revision parser and exporter
  2010-05-29 14:06   ` Jonathan Nieder
@ 2010-05-30 15:58     ` Ramkumar Ramachandra
  0 siblings, 0 replies; 29+ messages in thread
From: Ramkumar Ramachandra @ 2010-05-30 15:58 UTC (permalink / raw)
  To: Jonathan Nieder; +Cc: Git Mailing List, David Michael Barr

Hi Jonathan,

Jonathan Nieder wrote:
> Wait, where are SVN revisions being parsed?  It seems that the repo
> module maintains the exporter's state and provides a facility to
> to call the fast_export module to write it out.

Right. Sorry about the bad commit message.

>  repo_add(path, mode, blob_mark) is used to add a new file to
>  the current commit.
>
>  repo_modify is used to add a replacement for an existing file;
>  it is implemented exactly the same way, but a check could be
>  added later to distinguish the two cases.
>
>  repo_copy copies a blob from a previous revision to the current
>  commit.
>
>  repo_replace modifies the content of a file from the current
>  commit, if and only if it already exists.
>
>  repo_delete removes a file or directory from the current commit.
>
>  repo_commit calls out to fast_export to write the current commit
>  to the fast-import stream in stdout.
>
>  repo_diff is used by the fast_export module to write the changes
>  for a commit.
>
>  repo_reset erases the exporter's state, so valgrind can be happy.

Thanks for the nice descriptions. Will be useful while preparing
Documentation/technical.

> Mode must be 100644, 100755, 120000, or 160000.

Okay, do we put in an assert() for sanity?

> Style: we don’t tend to use typedef to hide underlying struct definitions
> (see Documentation/CodingStyle from linux-2.6.git, chapter 5, for some
> explanation about why).

Fixed.

> Maybe some local variables would make this more readable:

Fixed.

> Is this for adding new entries to an existing directory?  It is
> getting late, so I did not look it over carefully.

For David.

> If a file "foo" exists and I ask for "foo/bar", this will return
> the entry for foo.  Is that appropriate?

For David.

> When would it be NULL?  Is an empty path allowed?

For David.

> Can this overflow the path stack?

Actually, this has already been changed- see the latest commits to see
how path is allocated.

> Might be easier to read the other way around:

Fixed.

> I did not carefully trace the cases where repo_clone_dir may reuse a
> dir.  I would be happier if someone else does.

For David.

-- Ram

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 5/7] Add API for string-specific memory pool
  2010-05-29 11:38   ` Jonathan Nieder
  2010-05-30  9:38     ` Ramkumar Ramachandra
@ 2010-05-30 16:52     ` Ramkumar Ramachandra
  1 sibling, 0 replies; 29+ messages in thread
From: Ramkumar Ramachandra @ 2010-05-30 16:52 UTC (permalink / raw)
  To: Jonathan Nieder; +Cc: Git Mailing List, David Michael Barr

Hi again,

Jonathan Nieder wrote:
>> +static int node_indentity_cmp(node_t *a, node_t *b)
>> +{
>> +    int r = node_value_cmp(a, b);
>> +    return r ? r : (((uintptr_t) a) > ((uintptr_t) b))
>> +        - (((uintptr_t) a) < ((uintptr_t) b));
>
> nitpick: could use fewer parentheses.
>
>        return r ? r : (((uintptr_t) a > (uintptr_t) b) - ...

Actually, this triggers a compiler error on GCC:
string_pool.c:34: warning: comparisons like ‘X<=Y<=Z’ do not have
their mathematical meaning

-- Ram

^ permalink raw reply	[flat|nested] 29+ messages in thread

end of thread, other threads:[~2010-05-30 16:53 UTC | newest]

Thread overview: 29+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-05-23 21:40 [PATCH 0/7] Import David's SVN exporter Ramkumar Ramachandra
2010-05-23 21:40 ` [WIP PATCH 1/7] Add skeleton remote helper for SVN Ramkumar Ramachandra
2010-05-23 21:40 ` [PATCH 2/7] Add cpp macro implementation of treaps Ramkumar Ramachandra
2010-05-29  7:18   ` Jonathan Nieder
2010-05-30  9:09     ` Ramkumar Ramachandra
2010-05-30  9:31       ` Jonathan Nieder
2010-05-30  9:33         ` Ramkumar Ramachandra
2010-05-23 21:40 ` [PATCH 3/7] Add buffer pool library Ramkumar Ramachandra
2010-05-24  7:47   ` Peter Baumann
2010-05-24 10:11     ` Ramkumar Ramachandra
2010-05-24 10:37       ` David Michael Barr
2010-05-29  8:51   ` Jonathan Nieder
2010-05-29 10:55     ` David Michael Barr
2010-05-23 21:40 ` [PATCH 4/7] Add a memory " Ramkumar Ramachandra
2010-05-29  9:06   ` Jonathan Nieder
2010-05-30  9:12     ` Ramkumar Ramachandra
2010-05-30  9:55       ` Jonathan Nieder
2010-05-30 10:51         ` Ramkumar Ramachandra
2010-05-23 21:40 ` [PATCH 5/7] Add API for string-specific memory pool Ramkumar Ramachandra
2010-05-29 11:38   ` Jonathan Nieder
2010-05-30  9:38     ` Ramkumar Ramachandra
2010-05-30 10:09       ` Jonathan Nieder
2010-05-30 16:52     ` Ramkumar Ramachandra
2010-05-23 21:40 ` [PATCH 6/7] Add SVN revision parser and exporter Ramkumar Ramachandra
2010-05-29 14:06   ` Jonathan Nieder
2010-05-30 15:58     ` Ramkumar Ramachandra
2010-05-23 21:40 ` [PATCH 7/7] Add handler for SVN dump Ramkumar Ramachandra
2010-05-30  8:59   ` Jonathan Nieder
2010-05-30 10:45     ` Ramkumar Ramachandra

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.