All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/4] streaming conversion API
@ 2011-05-21  6:58 Junio C Hamano
  2011-05-21  6:58 ` [PATCH] convert.h: move declarations for conversion from cache.h Junio C Hamano
                   ` (3 more replies)
  0 siblings, 4 replies; 12+ messages in thread
From: Junio C Hamano @ 2011-05-21  6:58 UTC (permalink / raw)
  To: git

This builds on top of jc/streaming topic, in which I earlier hinted that a
filtering mechanism that does not need to slurp everything in core can
later be plugged into the git_istream API. This topioc does exactly that.

The first patch moves declarations related to conversion from cache.h to
its own convert.h header file, so that new streaming conversion API can be
added to it without bloating cache.h even more.

The second patch defines the streaming filter API, adds a pass-through
"null" filter, plugs the streaming filter API to the git_istream API and
updates the write out codepath to use it.  The design of this API is an
object-oriented one that is similar to the istream API. The user of the
git_istream API first grabs "stream_filter" object based on the path, and
passes that filter object when creating a git_istream. The data read from
the underlying object is fed to the filter, and the output from the filter
is returned to the caller of the read_istream().

The third patch adds a non-guessing LF-to-CRLF streaming filter.

The fourth patch implements the ident filter. As both LF-to-CRLF and ident
filters can be active at the same time, it also introduces the "cascade"
mechanism that takes two filters, plugs one's output to the other's input,
and feeds the former and returns what is read from the latter.

I suspect that "smudge" filter should be relatively easy to add and
combine with the existing filters using the same cascade mechanism,
but I'll leave it as an exercise to interested readers.

Junio C Hamano (4):
  convert.h: move declarations for conversion from cache.h
  Add streaming filter API
  Add LF-to-CRLF streaming conversion
  streaming filter: ident filter and filter cascading

 cache.h     |   38 +------
 convert.c   |  363 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 convert.h   |   65 +++++++++++
 entry.c     |   16 ++-
 streaming.c |  100 ++++++++++++++++-
 streaming.h |    2 +-
 6 files changed, 530 insertions(+), 54 deletions(-)
 create mode 100644 convert.h

-- 
1.7.5.2.369.g8fc017

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH] convert.h: move declarations for conversion from cache.h
  2011-05-21  6:58 [PATCH 0/4] streaming conversion API Junio C Hamano
@ 2011-05-21  6:58 ` Junio C Hamano
  2011-05-21  6:58 ` [PATCH 2/4] Add streaming filter API Junio C Hamano
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 12+ messages in thread
From: Junio C Hamano @ 2011-05-21  6:58 UTC (permalink / raw)
  To: git

Before adding the streaming filter API to the conversion layer,
move the existing declarations related to the conversion to its
own header file.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 cache.h   |   38 +-------------------------------------
 convert.h |   44 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 45 insertions(+), 37 deletions(-)
 create mode 100644 convert.h

diff --git a/cache.h b/cache.h
index a5067ba..c781c11 100644
--- a/cache.h
+++ b/cache.h
@@ -6,6 +6,7 @@
 #include "hash.h"
 #include "advice.h"
 #include "gettext.h"
+#include "convert.h"
 
 #include SHA1_HEADER
 #ifndef git_SHA_CTX
@@ -582,35 +583,6 @@ extern int fsync_object_files;
 extern int core_preload_index;
 extern int core_apply_sparse_checkout;
 
-enum safe_crlf {
-	SAFE_CRLF_FALSE = 0,
-	SAFE_CRLF_FAIL = 1,
-	SAFE_CRLF_WARN = 2
-};
-
-extern enum safe_crlf safe_crlf;
-
-enum auto_crlf {
-	AUTO_CRLF_FALSE = 0,
-	AUTO_CRLF_TRUE = 1,
-	AUTO_CRLF_INPUT = -1
-};
-
-extern enum auto_crlf auto_crlf;
-
-enum eol {
-	EOL_UNSET,
-	EOL_CRLF,
-	EOL_LF,
-#ifdef NATIVE_CRLF
-	EOL_NATIVE = EOL_CRLF
-#else
-	EOL_NATIVE = EOL_LF
-#endif
-};
-
-extern enum eol core_eol;
-
 enum branch_track {
 	BRANCH_TRACK_UNSPECIFIED = -1,
 	BRANCH_TRACK_NEVER = 0,
@@ -1153,14 +1125,6 @@ extern void trace_strbuf(const char *key, const struct strbuf *buf);
 
 void packet_trace_identity(const char *prog);
 
-/* convert.c */
-/* returns 1 if *dst was used */
-extern int convert_to_git(const char *path, const char *src, size_t len,
-                          struct strbuf *dst, enum safe_crlf checksafe);
-extern int convert_to_working_tree(const char *path, const char *src, size_t len, struct strbuf *dst);
-extern int renormalize_buffer(const char *path, const char *src, size_t len, struct strbuf *dst);
-extern int can_bypass_conversion(const char *path);
-
 /* add */
 /*
  * return 0 if success, 1 - if addition of a file failed and
diff --git a/convert.h b/convert.h
new file mode 100644
index 0000000..b1b4a38
--- /dev/null
+++ b/convert.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2011, Google Inc.
+ */
+#ifndef CONVERT_H
+#define CONVERT_H
+
+enum safe_crlf {
+	SAFE_CRLF_FALSE = 0,
+	SAFE_CRLF_FAIL = 1,
+	SAFE_CRLF_WARN = 2
+};
+
+extern enum safe_crlf safe_crlf;
+
+enum auto_crlf {
+	AUTO_CRLF_FALSE = 0,
+	AUTO_CRLF_TRUE = 1,
+	AUTO_CRLF_INPUT = -1
+};
+
+extern enum auto_crlf auto_crlf;
+
+enum eol {
+	EOL_UNSET,
+	EOL_CRLF,
+	EOL_LF,
+#ifdef NATIVE_CRLF
+	EOL_NATIVE = EOL_CRLF
+#else
+	EOL_NATIVE = EOL_LF
+#endif
+};
+
+extern enum eol core_eol;
+
+/* returns 1 if *dst was used */
+extern int convert_to_git(const char *path, const char *src, size_t len,
+			  struct strbuf *dst, enum safe_crlf checksafe);
+extern int convert_to_working_tree(const char *path, const char *src,
+				   size_t len, struct strbuf *dst);
+extern int renormalize_buffer(const char *path, const char *src, size_t len,
+			      struct strbuf *dst);
+extern int can_bypass_conversion(const char *path);
+#endif /* CONVERT_H */
-- 
1.7.5.2.369.g8fc017

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 2/4] Add streaming filter API
  2011-05-21  6:58 [PATCH 0/4] streaming conversion API Junio C Hamano
  2011-05-21  6:58 ` [PATCH] convert.h: move declarations for conversion from cache.h Junio C Hamano
@ 2011-05-21  6:58 ` Junio C Hamano
  2011-05-21  6:58 ` [PATCH 3/4] Add LF-to-CRLF streaming conversion Junio C Hamano
  2011-05-21  6:58 ` [PATCH 4/4] streaming filter: ident filter and filter cascading Junio C Hamano
  3 siblings, 0 replies; 12+ messages in thread
From: Junio C Hamano @ 2011-05-21  6:58 UTC (permalink / raw)
  To: git

This introduces an API to plug custom filters to an input stream.

The caller gets get_stream_filter("path") to obtain an appropriate
filter for the path, and then uses it when opening an input stream
via open_istream().  After that, the caller can read from the stream
with read_istream(), and close it with close_istream(), just like an
unfiltered stream.

This only adds a "null" filter that is a pass-thru filter, but later
changes can add LF-to-CRLF and other filters, and the callers of the
streaming API do not have to change.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 convert.c   |   84 +++++++++++++++++++++++++++++++++++++++++++++----
 convert.h   |   23 +++++++++++++-
 entry.c     |   16 ++++++---
 streaming.c |  100 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 streaming.h |    2 +-
 5 files changed, 209 insertions(+), 16 deletions(-)

diff --git a/convert.c b/convert.c
index 264af1d..1ec91a3 100644
--- a/convert.c
+++ b/convert.c
@@ -814,12 +814,69 @@ int renormalize_buffer(const char *path, const char *src, size_t len, struct str
 	return ret | convert_to_git(path, src, len, dst, 0);
 }
 
+/*****************************************************************
+ *
+ * Streaming converison support
+ *
+ *****************************************************************/
+
+typedef int (*filter_fn)(struct stream_filter *,
+			 const char *input, size_t *isize_p,
+			 char *output, size_t *osize_p);
+typedef void (*free_fn)(struct stream_filter *);
+
+struct stream_filter_vtbl {
+	filter_fn filter;
+	free_fn free;
+};
+
+struct stream_filter {
+	struct stream_filter_vtbl *vtbl;
+};
+
+static int null_filter_fn(struct stream_filter *filter,
+			  const char *input, size_t *isize_p,
+			  char *output, size_t *osize_p)
+{
+	size_t count = *isize_p;
+	if (*osize_p < count)
+		count = *osize_p;
+	if (count) {
+		memmove(output, input, count);
+		*isize_p -= count;
+		*osize_p -= count;
+	}
+	return 0;
+}
+
+static void null_free_fn(struct stream_filter *filter)
+{
+	; /* nothing -- null instances are shared */
+}
+
+static struct stream_filter_vtbl null_vtbl = {
+	null_filter_fn,
+	null_free_fn,
+};
+
+static struct stream_filter null_filter_singleton = {
+	&null_vtbl,
+};
+
+int is_null_stream_filter(struct stream_filter *filter)
+{
+	return filter == &null_filter_singleton;
+}
+
 /*
- * You would be crazy to set CRLF, smuge/clean or ident to
- * a large binary blob you would want us not to slurp into
- * the memory!
+ * Return an appropriately constructed filter for the path, or NULL if
+ * the contents cannot be filtered without reading the whole thing
+ * in-core.
+ *
+ * Note that you would be crazy to set CRLF, smuge/clean or ident to a
+ * large binary blob you would want us not to slurp into the memory!
  */
-int can_bypass_conversion(const char *path)
+struct stream_filter *get_stream_filter(const char *path, const unsigned char *sha1)
 {
 	struct conv_attrs ca;
 	enum crlf_action crlf_action;
@@ -828,11 +885,24 @@ int can_bypass_conversion(const char *path)
 
 	if (ca.ident ||
 	    (ca.drv && (ca.drv->smudge || ca.drv->clean)))
-		return 0;
+		return NULL;
 
 	crlf_action = input_crlf_action(ca.crlf_action, ca.eol_attr);
 	if ((crlf_action == CRLF_BINARY) || (crlf_action == CRLF_INPUT) ||
 	    (crlf_action == CRLF_GUESS && auto_crlf == AUTO_CRLF_FALSE))
-		return 1;
-	return 0;
+		return &null_filter_singleton;
+
+	return NULL;
+}
+
+void free_stream_filter(struct stream_filter *filter)
+{
+	filter->vtbl->free(filter);
+}
+
+int stream_filter(struct stream_filter *filter,
+		  const char *input, size_t *isize_p,
+		  char *output, size_t *osize_p)
+{
+	return filter->vtbl->filter(filter, input, isize_p, output, osize_p);
 }
diff --git a/convert.h b/convert.h
index d61c111..c9906f9 100644
--- a/convert.h
+++ b/convert.h
@@ -37,5 +37,26 @@ extern int convert_to_working_tree(const char *path, const char *src,
 				   size_t len, struct strbuf *dst);
 extern int renormalize_buffer(const char *path, const char *src, size_t len,
 			      struct strbuf *dst);
-extern int can_bypass_conversion(const char *path);
+
+/*****************************************************************
+ *
+ * Streaming converison support
+ *
+ *****************************************************************/
+
+struct stream_filter; /* opaque */
+
+extern struct stream_filter *get_stream_filter(const char *path, const unsigned char *);
+extern void free_stream_filter(struct stream_filter *);
+extern int is_null_stream_filter(struct stream_filter *);
+
+/*
+ * Use as much input up to *isize_p and fill output up to *osize_p;
+ * update isize_p and osize_p to indicate how much buffer space was
+ * consumed and filled. Return 0 on success, non-zero on error.
+ */
+extern int stream_filter(struct stream_filter *,
+			 const char *input, size_t *isize_p,
+			 char *output, size_t *osize_p);
+
 #endif /* CONVERT_H */
diff --git a/entry.c b/entry.c
index e2dc16c..852fea1 100644
--- a/entry.c
+++ b/entry.c
@@ -116,6 +116,7 @@ static int fstat_output(int fd, const struct checkout *state, struct stat *st)
 }
 
 static int streaming_write_entry(struct cache_entry *ce, char *path,
+				 struct stream_filter *filter,
 				 const struct checkout *state, int to_tempfile,
 				 int *fstat_done, struct stat *statbuf)
 {
@@ -126,7 +127,7 @@ static int streaming_write_entry(struct cache_entry *ce, char *path,
 	ssize_t kept = 0;
 	int fd = -1;
 
-	st = open_istream(ce->sha1, &type, &sz);
+	st = open_istream(ce->sha1, &type, &sz, filter);
 	if (!st)
 		return -1;
 	if (type != OBJ_BLOB)
@@ -186,11 +187,14 @@ static int write_entry(struct cache_entry *ce, char *path, const struct checkout
 	size_t wrote, newsize = 0;
 	struct stat st;
 
-	if ((ce_mode_s_ifmt == S_IFREG) &&
-	    can_bypass_conversion(path) &&
-	    !streaming_write_entry(ce, path, state, to_tempfile,
-				   &fstat_done, &st))
-		goto finish;
+	if (ce_mode_s_ifmt == S_IFREG) {
+		struct stream_filter *filter = get_stream_filter(path, ce->sha1);
+		if (filter &&
+		    !streaming_write_entry(ce, path, filter,
+					   state, to_tempfile,
+					   &fstat_done, &st))
+			goto finish;
+	}
 
 	switch (ce_mode_s_ifmt) {
 	case S_IFREG:
diff --git a/streaming.c b/streaming.c
index 0602926..565f000 100644
--- a/streaming.c
+++ b/streaming.c
@@ -41,6 +41,9 @@ struct stream_vtbl {
 static open_method_decl(incore);
 static open_method_decl(loose);
 static open_method_decl(pack_non_delta);
+static struct git_istream *attach_stream_filter(struct git_istream *st,
+						struct stream_filter *filter);
+
 
 static open_istream_fn open_istream_tbl[] = {
 	open_istream_incore,
@@ -48,6 +51,17 @@ static open_istream_fn open_istream_tbl[] = {
 	open_istream_pack_non_delta,
 };
 
+#define FILTER_BUFFER (1024*16)
+
+struct filtered_istream {
+	struct git_istream *upstream;
+	struct stream_filter *filter;
+	char ibuf[FILTER_BUFFER];
+	char obuf[FILTER_BUFFER];
+	int i_end, i_ptr;
+	int o_end, o_ptr;
+};
+
 struct git_istream {
 	const struct stream_vtbl *vtbl;
 	unsigned long size; /* inflated size of full object */
@@ -72,6 +86,8 @@ struct git_istream {
 			struct packed_git *pack;
 			off_t pos;
 		} in_pack;
+
+		struct filtered_istream filtered;
 	} u;
 };
 
@@ -112,7 +128,8 @@ static enum input_source istream_source(const unsigned char *sha1,
 
 struct git_istream *open_istream(const unsigned char *sha1,
 				 enum object_type *type,
-				 unsigned long *size)
+				 unsigned long *size,
+				 struct stream_filter *filter)
 {
 	struct git_istream *st;
 	struct object_info oi;
@@ -129,6 +146,14 @@ struct git_istream *open_istream(const unsigned char *sha1,
 			return NULL;
 		}
 	}
+	if (st && filter) {
+		/* Add "&& !is_null_stream_filter(filter)" for performance */
+		struct git_istream *nst = attach_stream_filter(st, filter);
+		if (!nst)
+			close_istream(st);
+		st = nst;
+	}
+
 	*size = st->size;
 	return st;
 }
@@ -149,6 +174,79 @@ static void close_deflated_stream(struct git_istream *st)
 
 /*****************************************************************
  *
+ * Filtered stream
+ *
+ *****************************************************************/
+
+static close_method_decl(filtered)
+{
+	free_stream_filter(st->u.filtered.filter);
+	return close_istream(st->u.filtered.upstream);
+}
+
+static read_method_decl(filtered)
+{
+	struct filtered_istream *fs = &(st->u.filtered);
+	size_t filled = 0;
+
+	while (sz) {
+		/* do we already have filtered output? */
+		if (fs->o_ptr < fs->o_end) {
+			size_t to_move = fs->o_end - fs->o_ptr;
+			if (sz < to_move)
+				to_move = sz;
+			memcpy(buf + filled, fs->obuf + fs->o_ptr, to_move);
+			fs->o_ptr += to_move;
+			sz -= to_move;
+			filled += to_move;
+			continue;
+		}
+		fs->o_end = fs->o_ptr = 0;
+
+		/* do we have anything to feed the filter with? */
+		if (fs->i_ptr < fs->i_end) {
+			size_t to_feed = fs->i_end - fs->i_ptr;
+			size_t to_receive = FILTER_BUFFER;
+			if (stream_filter(fs->filter,
+					  fs->ibuf + fs->i_ptr, &to_feed,
+					  fs->obuf, &to_receive))
+				return -1;
+			fs->i_ptr = fs->i_end - to_feed;
+			fs->o_end = FILTER_BUFFER - to_receive;
+			continue;
+		}
+		fs->i_end = fs->i_ptr = 0;
+
+		/* refill the input from the upstream */
+		fs->i_end = read_istream(fs->upstream, fs->ibuf, FILTER_BUFFER);
+		if (fs->i_end <= 0)
+			break;
+	}
+	return filled;
+}
+
+static struct stream_vtbl filtered_vtbl = {
+	close_istream_filtered,
+	read_istream_filtered,
+};
+
+static struct git_istream *attach_stream_filter(struct git_istream *st,
+						struct stream_filter *filter)
+{
+	struct git_istream *ifs = xmalloc(sizeof(*ifs));
+	struct filtered_istream *fs = &(ifs->u.filtered);
+
+	ifs->vtbl = &filtered_vtbl;
+	fs->upstream = st;
+	fs->filter = filter;
+	fs->i_end = fs->i_ptr = 0;
+	fs->o_end = fs->o_ptr = 0;
+	ifs->size = -1; /* unknown */
+	return ifs;
+}
+
+/*****************************************************************
+ *
  * Loose object stream
  *
  *****************************************************************/
diff --git a/streaming.h b/streaming.h
index 18cbe68..589e857 100644
--- a/streaming.h
+++ b/streaming.h
@@ -8,7 +8,7 @@
 /* opaque */
 struct git_istream;
 
-extern struct git_istream *open_istream(const unsigned char *, enum object_type *, unsigned long *);
+extern struct git_istream *open_istream(const unsigned char *, enum object_type *, unsigned long *, struct stream_filter *);
 extern int close_istream(struct git_istream *);
 extern ssize_t read_istream(struct git_istream *, char *, size_t);
 
-- 
1.7.5.2.369.g8fc017

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 3/4] Add LF-to-CRLF streaming conversion
  2011-05-21  6:58 [PATCH 0/4] streaming conversion API Junio C Hamano
  2011-05-21  6:58 ` [PATCH] convert.h: move declarations for conversion from cache.h Junio C Hamano
  2011-05-21  6:58 ` [PATCH 2/4] Add streaming filter API Junio C Hamano
@ 2011-05-21  6:58 ` Junio C Hamano
  2011-05-21  6:58 ` [PATCH 4/4] streaming filter: ident filter and filter cascading Junio C Hamano
  3 siblings, 0 replies; 12+ messages in thread
From: Junio C Hamano @ 2011-05-21  6:58 UTC (permalink / raw)
  To: git

If we do not have to guess or validate by scanning the input, we can
just stream this through.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 convert.c |   37 +++++++++++++++++++++++++++++++++++++
 1 files changed, 37 insertions(+), 0 deletions(-)

diff --git a/convert.c b/convert.c
index 1ec91a3..82fb75b 100644
--- a/convert.c
+++ b/convert.c
@@ -868,6 +868,39 @@ int is_null_stream_filter(struct stream_filter *filter)
 	return filter == &null_filter_singleton;
 }
 
+static int lf_to_crlf_filter_fn(struct stream_filter *filter,
+				const char *input, size_t *isize_p,
+				char *output, size_t *osize_p)
+{
+	size_t count = *isize_p;
+	if (count) {
+		size_t i, o;
+		for (i = o = 0; o < *osize_p && i < count; i++) {
+			char ch = input[i];
+			if (ch == '\n') {
+				if (o + 1 < *osize_p)
+					output[o++] = '\r';
+				else
+					break;
+			}
+			output[o++] = ch;
+		}
+
+		*osize_p -= o;
+		*isize_p -= i;
+	}
+	return 0;
+}
+
+static struct stream_filter_vtbl lf_to_crlf_vtbl = {
+	lf_to_crlf_filter_fn,
+	null_free_fn,
+};
+
+static struct stream_filter lf_to_crlf_filter_singleton = {
+	&lf_to_crlf_vtbl,
+};
+
 /*
  * Return an appropriately constructed filter for the path, or NULL if
  * the contents cannot be filtered without reading the whole thing
@@ -892,6 +925,10 @@ struct stream_filter *get_stream_filter(const char *path, const unsigned char *s
 	    (crlf_action == CRLF_GUESS && auto_crlf == AUTO_CRLF_FALSE))
 		return &null_filter_singleton;
 
+	if (output_eol(crlf_action) == EOL_CRLF &&
+	    !(crlf_action == CRLF_AUTO || crlf_action == CRLF_GUESS))
+		return &lf_to_crlf_filter_singleton;
+
 	return NULL;
 }
 
-- 
1.7.5.2.369.g8fc017

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 4/4] streaming filter: ident filter and filter cascading
  2011-05-21  6:58 [PATCH 0/4] streaming conversion API Junio C Hamano
                   ` (2 preceding siblings ...)
  2011-05-21  6:58 ` [PATCH 3/4] Add LF-to-CRLF streaming conversion Junio C Hamano
@ 2011-05-21  6:58 ` Junio C Hamano
  2011-05-21 21:05   ` René Scharfe
  3 siblings, 1 reply; 12+ messages in thread
From: Junio C Hamano @ 2011-05-21  6:58 UTC (permalink / raw)
  To: git

Add support for "ident" filter on the output codepath. Because we now have
more than one filter active, also add support to cascade them together.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---

 * There is one known issue in this "ident" filter implementation. If the
   input data ends while the filter is still in the "SKIPPING" state, it
   should go to the "DRAINING" codepath using the same logic that triggers
   when it gets to the end of line, copying out what was already queued.
   However, the API does not offer an explicit way for the input side to
   say "that's all". We probably need to make a zero-byte input to the
   filter function as an explicit EOF signal or something---currently I
   think it will loop forever, but it is getting late, so I will not be
   further touching this code tonight.

 convert.c |  254 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 files changed, 246 insertions(+), 8 deletions(-)

diff --git a/convert.c b/convert.c
index 82fb75b..6b90c4d 100644
--- a/convert.c
+++ b/convert.c
@@ -868,6 +868,10 @@ int is_null_stream_filter(struct stream_filter *filter)
 	return filter == &null_filter_singleton;
 }
 
+
+/*
+ * LF-to-CRLF filter
+ */
 static int lf_to_crlf_filter_fn(struct stream_filter *filter,
 				const char *input, size_t *isize_p,
 				char *output, size_t *osize_p)
@@ -901,6 +905,237 @@ static struct stream_filter lf_to_crlf_filter_singleton = {
 	&lf_to_crlf_vtbl,
 };
 
+
+/*
+ * Cascade filter
+ */
+#define FILTER_BUFFER 1024
+struct cascade_filter {
+	struct stream_filter filter;
+	struct stream_filter *one;
+	struct stream_filter *two;
+	char ibuf[FILTER_BUFFER];
+	char obuf[FILTER_BUFFER];
+	int i_end, i_ptr;
+	int o_end, o_ptr;
+};
+
+static int cascade_filter_fn(struct stream_filter *filter,
+			     const char *input, size_t *isize_p,
+			     char *output, size_t *osize_p)
+{
+	struct cascade_filter *cas = (struct cascade_filter *) filter;
+	size_t filled = 0;
+	size_t sz = *osize_p;
+	size_t to_feed, remaining;
+
+	/*
+	 * input --> ibuf --(one)--> obuf --(two)--> output
+	 */
+	while (filled < sz) {
+		/* do we already have output to feed two? */
+		if (cas->o_ptr < cas->o_end) {
+			to_feed = cas->o_end - cas->o_ptr;
+			remaining = sz - filled;
+			if (stream_filter(cas->two,
+					  cas->obuf + cas->o_ptr, &to_feed,
+					  output + filled, &remaining))
+				return -1;
+			cas->o_ptr += (cas->o_end - cas->o_ptr) - to_feed;
+			filled = sz - remaining;
+			continue;
+		}
+		cas->o_end = cas->o_ptr = 0;
+
+		/* do we have anything to feed one? */
+		if (cas->i_ptr < cas->i_end) {
+			to_feed = cas->i_end - cas->i_ptr;
+			remaining = sizeof(cas->obuf);
+			if (stream_filter(cas->one,
+					  cas->ibuf + cas->i_ptr, &to_feed,
+					  cas->obuf, &remaining))
+				return -1;
+			cas->i_ptr += (cas->i_end - cas->i_ptr) - to_feed;
+			cas->o_end = sizeof(cas->obuf) - remaining;
+			continue;
+		}
+		cas->i_end = cas->i_ptr = 0;
+
+		/* read from our upstream */
+		to_feed = *isize_p;
+		if (sizeof(cas->ibuf) < to_feed)
+			to_feed = sizeof(cas->ibuf);
+		if (!to_feed)
+			break;
+		memcpy(cas->ibuf, input, to_feed);
+		input += to_feed;
+		*isize_p -= to_feed;
+		cas->i_end = to_feed;
+	}
+	*osize_p -= filled;
+	return 0;
+}
+
+static void cascade_free_fn(struct stream_filter *filter)
+{
+	struct cascade_filter *cas = (struct cascade_filter *)filter;
+	free_stream_filter(cas->one);
+	free_stream_filter(cas->two);
+	free(filter);
+}
+
+static struct stream_filter_vtbl cascade_vtbl = {
+	cascade_filter_fn,
+	cascade_free_fn,
+};
+
+static struct stream_filter *cascade_filter(struct stream_filter *one,
+					    struct stream_filter *two)
+{
+	struct cascade_filter *cascade;
+
+	if (!one || is_null_stream_filter(one))
+		return two;
+	if (!two || is_null_stream_filter(two))
+		return one;
+
+	cascade = xmalloc(sizeof(*cascade));
+	cascade->one = one;
+	cascade->two = two;
+	cascade->i_end = cascade->i_ptr = 0;
+	cascade->o_end = cascade->o_ptr = 0;
+	cascade->filter.vtbl = &cascade_vtbl;
+	return (struct stream_filter *)cascade;
+}
+
+/*
+ * ident filter
+ */
+#define IDENT_DRAINING (-1)
+#define IDENT_SKIPPING (-2)
+struct ident_filter {
+	struct stream_filter filter;
+	struct strbuf left;
+	int state;
+	char ident[45]; /* ": x40 $" */
+};
+
+static int is_foreign_ident(const char *str)
+{
+	int i;
+
+	if (prefixcmp(str, "$Id: "))
+		return 0;
+	for (i = 5; str[i]; i++) {
+		if (isspace(str[i]) && str[i+1] != '$')
+			return 1;
+	}
+	return 0;
+}
+
+static int ident_filter_fn(struct stream_filter *filter,
+			   const char *input, size_t *isize_p,
+			   char *output, size_t *osize_p)
+{
+	struct ident_filter *ident = (struct ident_filter *)filter;
+	static const char head[] = "$Id";
+
+	while (*isize_p || (ident->state == IDENT_DRAINING)) {
+		int ch;
+
+		if (ident->state == IDENT_DRAINING) {
+			/* Draining what is in left */
+			size_t to_drain = ident->left.len;
+
+			if (!to_drain) {
+				ident->state = 0;
+				continue;
+			}
+			if (!*osize_p)
+				break;
+			if (*osize_p < to_drain)
+				to_drain = *osize_p;
+			memcpy(output, ident->left.buf, to_drain);
+			strbuf_remove(&ident->left, 0, to_drain);
+			output += to_drain;
+			(*osize_p) -= to_drain;
+			continue;
+		}
+
+		ch = *(input++);
+		(*isize_p)--;
+
+		if (ident->state == IDENT_SKIPPING) {
+			/*
+			 * Skipping until '$' or LF, but keeping them
+			 * in case it is a foreign ident.
+			 */
+			strbuf_addch(&ident->left, ch);
+			if (ch != '\n' && ch != '$')
+				continue;
+			if (ch == '$' && !is_foreign_ident(ident->left.buf)) {
+				strbuf_reset(&ident->left);
+				strbuf_addstr(&ident->left, head);
+				strbuf_addstr(&ident->left, ident->ident);
+			}
+			ident->state = IDENT_DRAINING;
+			continue;
+		}
+
+		if (ident->state < sizeof(head) &&
+		    head[ident->state] == ch) {
+			ident->state++;
+			continue;
+		}
+
+		if (ident->state)
+			strbuf_add(&ident->left, head, ident->state);
+		if (ident->state == sizeof(head) - 1) {
+			if (ch != ':' && ch != '$') {
+				strbuf_addch(&ident->left, ch);
+				ident->state = 0;
+				continue;
+			}
+
+			if (ch == ':') {
+				strbuf_addch(&ident->left, ch);
+				ident->state = IDENT_SKIPPING;
+			} else {
+				strbuf_addstr(&ident->left, ident->ident);
+				ident->state = IDENT_DRAINING;
+			}
+			continue;
+		}
+
+		strbuf_addch(&ident->left, ch);
+		ident->state = IDENT_DRAINING;
+	}
+	return 0;
+}
+
+static void ident_free_fn(struct stream_filter *filter)
+{
+	struct ident_filter *ident = (struct ident_filter *)filter;
+	strbuf_release(&ident->left);
+	free(filter);
+}
+
+static struct stream_filter_vtbl ident_vtbl = {
+	ident_filter_fn,
+	ident_free_fn,
+};
+
+static struct stream_filter *ident_filter(const unsigned char *sha1)
+{
+	struct ident_filter *ident = xmalloc(sizeof(*ident));
+
+	sprintf(ident->ident, ": %s $", sha1_to_hex(sha1));
+	strbuf_init(&ident->left, 0);
+	ident->filter.vtbl = &ident_vtbl;
+	ident->state = 0;
+	return (struct stream_filter *)ident;
+}
+
 /*
  * Return an appropriately constructed filter for the path, or NULL if
  * the contents cannot be filtered without reading the whole thing
@@ -913,23 +1148,26 @@ struct stream_filter *get_stream_filter(const char *path, const unsigned char *s
 {
 	struct conv_attrs ca;
 	enum crlf_action crlf_action;
+	struct stream_filter *filter = NULL;
 
 	convert_attrs(&ca, path);
 
-	if (ca.ident ||
-	    (ca.drv && (ca.drv->smudge || ca.drv->clean)))
-		return NULL;
+	if (ca.drv && (ca.drv->smudge || ca.drv->clean))
+		return filter;
+
+	if (ca.ident)
+		filter = ident_filter(sha1);
 
 	crlf_action = input_crlf_action(ca.crlf_action, ca.eol_attr);
 	if ((crlf_action == CRLF_BINARY) || (crlf_action == CRLF_INPUT) ||
 	    (crlf_action == CRLF_GUESS && auto_crlf == AUTO_CRLF_FALSE))
-		return &null_filter_singleton;
+		filter = cascade_filter(filter, &null_filter_singleton);
 
-	if (output_eol(crlf_action) == EOL_CRLF &&
-	    !(crlf_action == CRLF_AUTO || crlf_action == CRLF_GUESS))
-		return &lf_to_crlf_filter_singleton;
+	else if (output_eol(crlf_action) == EOL_CRLF &&
+		 !(crlf_action == CRLF_AUTO || crlf_action == CRLF_GUESS))
+		filter = cascade_filter(filter, &lf_to_crlf_filter_singleton);
 
-	return NULL;
+	return filter;
 }
 
 void free_stream_filter(struct stream_filter *filter)
-- 
1.7.5.2.369.g8fc017

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH 4/4] streaming filter: ident filter and filter cascading
  2011-05-21  6:58 ` [PATCH 4/4] streaming filter: ident filter and filter cascading Junio C Hamano
@ 2011-05-21 21:05   ` René Scharfe
  2011-05-21 21:25     ` René Scharfe
  2011-05-21 22:08     ` [PATCH 4/4] streaming filter: ident filter and " René Scharfe
  0 siblings, 2 replies; 12+ messages in thread
From: René Scharfe @ 2011-05-21 21:05 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

Am 21.05.2011 08:58, schrieb Junio C Hamano:
> Add support for "ident" filter on the output codepath. Because we now have
> more than one filter active, also add support to cascade them together.
> 
> Signed-off-by: Junio C Hamano <gitster@pobox.com>
> ---
> 
>  * There is one known issue in this "ident" filter implementation. If the
>    input data ends while the filter is still in the "SKIPPING" state, it
>    should go to the "DRAINING" codepath using the same logic that triggers
>    when it gets to the end of line, copying out what was already queued.
>    However, the API does not offer an explicit way for the input side to
>    say "that's all". We probably need to make a zero-byte input to the
>    filter function as an explicit EOF signal or something---currently I
>    think it will loop forever, but it is getting late, so I will not be
>    further touching this code tonight.

AFAIU it will leave skipping mode once there is no input data anymore
instead of looping forever.  The accumulated stuff will be discarded.

The test suite is passed, by the way, even though t0021 seems to contain
a test for this issue (NoTerminatingSymbolAtEOF).  It uses echo to
create the files so there will be a terminating newline before the end
of the file, though, so this subtest probably needs to be changed.

> 
>  convert.c |  254 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
>  1 files changed, 246 insertions(+), 8 deletions(-)
> 

Perhaps split off the cascading part into its own patch?

> diff --git a/convert.c b/convert.c
> index 82fb75b..6b90c4d 100644
> --- a/convert.c
> +++ b/convert.c
> @@ -868,6 +868,10 @@ int is_null_stream_filter(struct stream_filter *filter)
>  	return filter == &null_filter_singleton;
>  }
>  
> +
> +/*
> + * LF-to-CRLF filter
> + */
>  static int lf_to_crlf_filter_fn(struct stream_filter *filter,
>  				const char *input, size_t *isize_p,
>  				char *output, size_t *osize_p)
> @@ -901,6 +905,237 @@ static struct stream_filter lf_to_crlf_filter_singleton = {
>  	&lf_to_crlf_vtbl,
>  };
>  
> +
> +/*
> + * Cascade filter
> + */
> +#define FILTER_BUFFER 1024
> +struct cascade_filter {
> +	struct stream_filter filter;
> +	struct stream_filter *one;
> +	struct stream_filter *two;
> +	char ibuf[FILTER_BUFFER];
> +	char obuf[FILTER_BUFFER];
> +	int i_end, i_ptr;
> +	int o_end, o_ptr;
> +};
> +
> +static int cascade_filter_fn(struct stream_filter *filter,
> +			     const char *input, size_t *isize_p,
> +			     char *output, size_t *osize_p)
> +{
> +	struct cascade_filter *cas = (struct cascade_filter *) filter;
> +	size_t filled = 0;
> +	size_t sz = *osize_p;
> +	size_t to_feed, remaining;
> +
> +	/*
> +	 * input --> ibuf --(one)--> obuf --(two)--> output
> +	 */
> +	while (filled < sz) {
> +		/* do we already have output to feed two? */
> +		if (cas->o_ptr < cas->o_end) {
> +			to_feed = cas->o_end - cas->o_ptr;
> +			remaining = sz - filled;
> +			if (stream_filter(cas->two,
> +					  cas->obuf + cas->o_ptr, &to_feed,
> +					  output + filled, &remaining))
> +				return -1;
> +			cas->o_ptr += (cas->o_end - cas->o_ptr) - to_feed;
> +			filled = sz - remaining;
> +			continue;
> +		}
> +		cas->o_end = cas->o_ptr = 0;
> +
> +		/* do we have anything to feed one? */
> +		if (cas->i_ptr < cas->i_end) {
> +			to_feed = cas->i_end - cas->i_ptr;
> +			remaining = sizeof(cas->obuf);
> +			if (stream_filter(cas->one,
> +					  cas->ibuf + cas->i_ptr, &to_feed,
> +					  cas->obuf, &remaining))
> +				return -1;
> +			cas->i_ptr += (cas->i_end - cas->i_ptr) - to_feed;
> +			cas->o_end = sizeof(cas->obuf) - remaining;
> +			continue;
> +		}
> +		cas->i_end = cas->i_ptr = 0;
> +
> +		/* read from our upstream */
> +		to_feed = *isize_p;
> +		if (sizeof(cas->ibuf) < to_feed)
> +			to_feed = sizeof(cas->ibuf);
> +		if (!to_feed)
> +			break;
> +		memcpy(cas->ibuf, input, to_feed);
> +		input += to_feed;
> +		*isize_p -= to_feed;
> +		cas->i_end = to_feed;
> +	}
> +	*osize_p -= filled;
> +	return 0;
> +}
> +
> +static void cascade_free_fn(struct stream_filter *filter)
> +{
> +	struct cascade_filter *cas = (struct cascade_filter *)filter;
> +	free_stream_filter(cas->one);
> +	free_stream_filter(cas->two);
> +	free(filter);
> +}
> +
> +static struct stream_filter_vtbl cascade_vtbl = {
> +	cascade_filter_fn,
> +	cascade_free_fn,
> +};
> +
> +static struct stream_filter *cascade_filter(struct stream_filter *one,
> +					    struct stream_filter *two)
> +{
> +	struct cascade_filter *cascade;
> +
> +	if (!one || is_null_stream_filter(one))
> +		return two;
> +	if (!two || is_null_stream_filter(two))
> +		return one;

The is_null_stream_filter() check could be left out initially.  This
would allow for testing the cascading part with null filters.

> +
> +	cascade = xmalloc(sizeof(*cascade));
> +	cascade->one = one;
> +	cascade->two = two;
> +	cascade->i_end = cascade->i_ptr = 0;
> +	cascade->o_end = cascade->o_ptr = 0;
> +	cascade->filter.vtbl = &cascade_vtbl;
> +	return (struct stream_filter *)cascade;
> +}
> +
> +/*
> + * ident filter
> + */
> +#define IDENT_DRAINING (-1)
> +#define IDENT_SKIPPING (-2)
> +struct ident_filter {
> +	struct stream_filter filter;
> +	struct strbuf left;
> +	int state;
> +	char ident[45]; /* ": x40 $" */
> +};
> +
> +static int is_foreign_ident(const char *str)
> +{
> +	int i;
> +
> +	if (prefixcmp(str, "$Id: "))
> +		return 0;
> +	for (i = 5; str[i]; i++) {
> +		if (isspace(str[i]) && str[i+1] != '$')
> +			return 1;
> +	}
> +	return 0;
> +}
> +
> +static int ident_filter_fn(struct stream_filter *filter,
> +			   const char *input, size_t *isize_p,
> +			   char *output, size_t *osize_p)
> +{
> +	struct ident_filter *ident = (struct ident_filter *)filter;
> +	static const char head[] = "$Id";
> +
> +	while (*isize_p || (ident->state == IDENT_DRAINING)) {
> +		int ch;
> +
> +		if (ident->state == IDENT_DRAINING) {
> +			/* Draining what is in left */
> +			size_t to_drain = ident->left.len;
> +
> +			if (!to_drain) {
> +				ident->state = 0;
> +				continue;
> +			}
> +			if (!*osize_p)
> +				break;
> +			if (*osize_p < to_drain)
> +				to_drain = *osize_p;
> +			memcpy(output, ident->left.buf, to_drain);
> +			strbuf_remove(&ident->left, 0, to_drain);
> +			output += to_drain;
> +			(*osize_p) -= to_drain;
> +			continue;
> +		}
> +
> +		ch = *(input++);
> +		(*isize_p)--;
> +
> +		if (ident->state == IDENT_SKIPPING) {
> +			/*
> +			 * Skipping until '$' or LF, but keeping them
> +			 * in case it is a foreign ident.
> +			 */
> +			strbuf_addch(&ident->left, ch);
> +			if (ch != '\n' && ch != '$')
> +				continue;
> +			if (ch == '$' && !is_foreign_ident(ident->left.buf)) {
> +				strbuf_reset(&ident->left);
> +				strbuf_addstr(&ident->left, head);
> +				strbuf_addstr(&ident->left, ident->ident);
> +			}
> +			ident->state = IDENT_DRAINING;
> +			continue;
> +		}
> +
> +		if (ident->state < sizeof(head) &&

		// minus one because otherwise we'd compare the
		// terminating NUL as well even though we're not
		// actually looking for a NUL
		if (ident->state < sizeof(head) - 1 &&

> +		    head[ident->state] == ch) {
> +			ident->state++;
> +			continue;
> +		}
> +
> +		if (ident->state)
> +			strbuf_add(&ident->left, head, ident->state);
> +		if (ident->state == sizeof(head) - 1) {
> +			if (ch != ':' && ch != '$') {
> +				strbuf_addch(&ident->left, ch);
> +				ident->state = 0;

				// clean the plate right here and now;
				// can be combined with the case at the
				// end of the loop
				ident->state = IDENT_DRAINING;

> +				continue;
> +			}
> +
> +			if (ch == ':') {
> +				strbuf_addch(&ident->left, ch);
> +				ident->state = IDENT_SKIPPING;
> +			} else {
> +				strbuf_addstr(&ident->left, ident->ident);
> +				ident->state = IDENT_DRAINING;
> +			}
> +			continue;
> +		}
> +
> +		strbuf_addch(&ident->left, ch);
> +		ident->state = IDENT_DRAINING;
> +	}
> +	return 0;
> +}
> +
> +static void ident_free_fn(struct stream_filter *filter)
> +{
> +	struct ident_filter *ident = (struct ident_filter *)filter;
> +	strbuf_release(&ident->left);
> +	free(filter);
> +}
> +
> +static struct stream_filter_vtbl ident_vtbl = {
> +	ident_filter_fn,
> +	ident_free_fn,
> +};
> +
> +static struct stream_filter *ident_filter(const unsigned char *sha1)
> +{
> +	struct ident_filter *ident = xmalloc(sizeof(*ident));
> +
> +	sprintf(ident->ident, ": %s $", sha1_to_hex(sha1));
> +	strbuf_init(&ident->left, 0);
> +	ident->filter.vtbl = &ident_vtbl;
> +	ident->state = 0;
> +	return (struct stream_filter *)ident;
> +}
> +
>  /*
>   * Return an appropriately constructed filter for the path, or NULL if
>   * the contents cannot be filtered without reading the whole thing
> @@ -913,23 +1148,26 @@ struct stream_filter *get_stream_filter(const char *path, const unsigned char *s
>  {
>  	struct conv_attrs ca;
>  	enum crlf_action crlf_action;
> +	struct stream_filter *filter = NULL;
>  
>  	convert_attrs(&ca, path);
>  
> -	if (ca.ident ||
> -	    (ca.drv && (ca.drv->smudge || ca.drv->clean)))
> -		return NULL;
> +	if (ca.drv && (ca.drv->smudge || ca.drv->clean))
> +		return filter;
> +
> +	if (ca.ident)
> +		filter = ident_filter(sha1);
>  
>  	crlf_action = input_crlf_action(ca.crlf_action, ca.eol_attr);
>  	if ((crlf_action == CRLF_BINARY) || (crlf_action == CRLF_INPUT) ||
>  	    (crlf_action == CRLF_GUESS && auto_crlf == AUTO_CRLF_FALSE))
> -		return &null_filter_singleton;
> +		filter = cascade_filter(filter, &null_filter_singleton);
>  
> -	if (output_eol(crlf_action) == EOL_CRLF &&
> -	    !(crlf_action == CRLF_AUTO || crlf_action == CRLF_GUESS))
> -		return &lf_to_crlf_filter_singleton;
> +	else if (output_eol(crlf_action) == EOL_CRLF &&
> +		 !(crlf_action == CRLF_AUTO || crlf_action == CRLF_GUESS))
> +		filter = cascade_filter(filter, &lf_to_crlf_filter_singleton);
>  
> -	return NULL;
> +	return filter;
>  }
>  
>  void free_stream_filter(struct stream_filter *filter)

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 4/4] streaming filter: ident filter and filter cascading
  2011-05-21 21:05   ` René Scharfe
@ 2011-05-21 21:25     ` René Scharfe
  2011-05-25  1:06       ` Junio C Hamano
  2011-05-21 22:08     ` [PATCH 4/4] streaming filter: ident filter and " René Scharfe
  1 sibling, 1 reply; 12+ messages in thread
From: René Scharfe @ 2011-05-21 21:25 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano

Am 21.05.2011 23:05, schrieb René Scharfe:
> Am 21.05.2011 08:58, schrieb Junio C Hamano:
>> Add support for "ident" filter on the output codepath. Because we now have
>> more than one filter active, also add support to cascade them together.
>>
>> Signed-off-by: Junio C Hamano <gitster@pobox.com>
>> ---
>>
>>  * There is one known issue in this "ident" filter implementation. If the
>>    input data ends while the filter is still in the "SKIPPING" state, it
>>    should go to the "DRAINING" codepath using the same logic that triggers
>>    when it gets to the end of line, copying out what was already queued.
>>    However, the API does not offer an explicit way for the input side to
>>    say "that's all". We probably need to make a zero-byte input to the
>>    filter function as an explicit EOF signal or something---currently I
>>    think it will loop forever, but it is getting late, so I will not be
>>    further touching this code tonight.
> 
> AFAIU it will leave skipping mode once there is no input data anymore
> instead of looping forever.  The accumulated stuff will be discarded.
> 
> The test suite is passed, by the way, even though t0021 seems to contain
> a test for this issue (NoTerminatingSymbolAtEOF).  It uses echo to
> create the files so there will be a terminating newline before the end
> of the file, though, so this subtest probably needs to be changed.

-- >8 --
Subject: t0021-conversion.sh: fix NoTerminatingSymbolAtEOF test

The last line of the test file "expanded-keywords" ended in a newline,
which is a valid terminator for ident.  Use printf instead of echo to omit
it and thus really test if a file that ends unexpectedly in the middle of
an ident tag is handled properly.

Also take the oppertunity to calculate the expected ID dynamically
instead of hardcoding it into the test script.  This should make future
changes easier.

Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
---
 t/t0021-conversion.sh |   23 ++++++++++++-----------
 1 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/t/t0021-conversion.sh b/t/t0021-conversion.sh
index 9078b84..275421e 100755
--- a/t/t0021-conversion.sh
+++ b/t/t0021-conversion.sh
@@ -66,25 +66,26 @@ test_expect_success expanded_in_repo '
 		echo "\$Id:NoSpaceAtEitherEnd\$"
 		echo "\$Id: NoTerminatingSymbol"
 		echo "\$Id: Foreign Commit With Spaces \$"
-		echo "\$Id: NoTerminatingSymbolAtEOF"
+		printf "\$Id: NoTerminatingSymbolAtEOF"
 	} > expanded-keywords &&
 
+	git add expanded-keywords &&
+	git commit -m "File with keywords expanded" &&
+	id=$(git rev-parse --verify :expanded-keywords) &&
+
 	{
 		echo "File with expanded keywords"
-		echo "\$Id: fd0478f5f1486f3d5177d4c3f6eb2765e8fc56b9 \$"
-		echo "\$Id: fd0478f5f1486f3d5177d4c3f6eb2765e8fc56b9 \$"
-		echo "\$Id: fd0478f5f1486f3d5177d4c3f6eb2765e8fc56b9 \$"
-		echo "\$Id: fd0478f5f1486f3d5177d4c3f6eb2765e8fc56b9 \$"
-		echo "\$Id: fd0478f5f1486f3d5177d4c3f6eb2765e8fc56b9 \$"
-		echo "\$Id: fd0478f5f1486f3d5177d4c3f6eb2765e8fc56b9 \$"
+		echo "\$Id: $id \$"
+		echo "\$Id: $id \$"
+		echo "\$Id: $id \$"
+		echo "\$Id: $id \$"
+		echo "\$Id: $id \$"
+		echo "\$Id: $id \$"
 		echo "\$Id: NoTerminatingSymbol"
 		echo "\$Id: Foreign Commit With Spaces \$"
-		echo "\$Id: NoTerminatingSymbolAtEOF"
+		printf "\$Id: NoTerminatingSymbolAtEOF"
 	} > expected-output &&
 
-	git add expanded-keywords &&
-	git commit -m "File with keywords expanded" &&
-
 	echo "expanded-keywords ident" >> .gitattributes &&
 
 	rm -f expanded-keywords &&
-- 
1.7.5.2

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH 4/4] streaming filter: ident filter and filter cascading
  2011-05-21 21:05   ` René Scharfe
  2011-05-21 21:25     ` René Scharfe
@ 2011-05-21 22:08     ` René Scharfe
  2011-05-22  1:00       ` Junio C Hamano
  1 sibling, 1 reply; 12+ messages in thread
From: René Scharfe @ 2011-05-21 22:08 UTC (permalink / raw)
  Cc: Junio C Hamano, git

Am 21.05.2011 23:05, schrieb René Scharfe:
>> +		if (ident->state < sizeof(head) &&
> 
> 		// minus one because otherwise we'd compare the
> 		// terminating NUL as well even though we're not
> 		// actually looking for a NUL
> 		if (ident->state < sizeof(head) - 1 &&

Possibly, but that doesn't matter, as the right number of characters is
remembered and the second test below is not passed if we sailed past the
NUL.  Sorry for the noise.

> 
>> +		    head[ident->state] == ch) {
>> +			ident->state++;
>> +			continue;
>> +		}
>> +
>> +		if (ident->state)
>> +			strbuf_add(&ident->left, head, ident->state);
>> +		if (ident->state == sizeof(head) - 1) {

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 4/4] streaming filter: ident filter and filter cascading
  2011-05-21 22:08     ` [PATCH 4/4] streaming filter: ident filter and " René Scharfe
@ 2011-05-22  1:00       ` Junio C Hamano
  0 siblings, 0 replies; 12+ messages in thread
From: Junio C Hamano @ 2011-05-22  1:00 UTC (permalink / raw)
  To: René Scharfe; +Cc: git

René Scharfe <rene.scharfe@lsrfire.ath.cx> writes:

> Am 21.05.2011 23:05, schrieb René Scharfe:
>>> +		if (ident->state < sizeof(head) &&
>> 
>> 		// minus one because otherwise we'd compare the
>> 		// terminating NUL as well even though we're not
>> 		// actually looking for a NUL
>> 		if (ident->state < sizeof(head) - 1 &&
>
> Possibly, but that doesn't matter, as the right number of characters is
> remembered and the second test below is not passed if we sailed past the
> NUL.  Sorry for the noise.

Thanks for a careful reading.

In the second re-roll, I'll split the cascade and ident into two patches
(ident comes first, with a fake cascade that returns NULL to indicate
there is no cascading supported when two "real" filters are given). Also I
do not need two buffers in the cascade. The input to cascade can be fed
directly to the input of cascade->one to fill the cascade's buffer, and we
can feed cascade->two with what is in the cascade's buffer to directly
drain to the output of the cascade.

But that won't happen today.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 4/4] streaming filter: ident filter and filter cascading
  2011-05-21 21:25     ` René Scharfe
@ 2011-05-25  1:06       ` Junio C Hamano
  2011-05-25  2:18         ` [PATCH] t0021: test application of both crlf and ident Junio C Hamano
  2011-05-25  2:23         ` [PATCH v2?] streaming: filter cascading Junio C Hamano
  0 siblings, 2 replies; 12+ messages in thread
From: Junio C Hamano @ 2011-05-25  1:06 UTC (permalink / raw)
  To: René Scharfe; +Cc: git

René Scharfe <rene.scharfe@lsrfire.ath.cx> writes:

> Subject: t0021-conversion.sh: fix NoTerminatingSymbolAtEOF test

Thanks.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH] t0021: test application of both crlf and ident
  2011-05-25  1:06       ` Junio C Hamano
@ 2011-05-25  2:18         ` Junio C Hamano
  2011-05-25  2:23         ` [PATCH v2?] streaming: filter cascading Junio C Hamano
  1 sibling, 0 replies; 12+ messages in thread
From: Junio C Hamano @ 2011-05-25  2:18 UTC (permalink / raw)
  To: git; +Cc: René Scharfe


Signed-off-by: Junio C Hamano <gitster@pobox.com>
---

 * This comes on top of René's updates to the test. 

 t/t0021-conversion.sh |   32 ++++++++++++++++++++++++--------
 1 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/t/t0021-conversion.sh b/t/t0021-conversion.sh
index 275421e..f19e651 100755
--- a/t/t0021-conversion.sh
+++ b/t/t0021-conversion.sh
@@ -66,10 +66,14 @@ test_expect_success expanded_in_repo '
 		echo "\$Id:NoSpaceAtEitherEnd\$"
 		echo "\$Id: NoTerminatingSymbol"
 		echo "\$Id: Foreign Commit With Spaces \$"
-		printf "\$Id: NoTerminatingSymbolAtEOF"
-	} > expanded-keywords &&
+	} >expanded-keywords.0 &&
 
-	git add expanded-keywords &&
+	{
+		cat expanded-keywords.0 &&
+		printf "\$Id: NoTerminatingSymbolAtEOF"
+	} >expanded-keywords &&
+	cat expanded-keywords >expanded-keywords-crlf &&
+	git add expanded-keywords expanded-keywords-crlf &&
 	git commit -m "File with keywords expanded" &&
 	id=$(git rev-parse --verify :expanded-keywords) &&
 
@@ -83,15 +87,27 @@ test_expect_success expanded_in_repo '
 		echo "\$Id: $id \$"
 		echo "\$Id: NoTerminatingSymbol"
 		echo "\$Id: Foreign Commit With Spaces \$"
+	} >expected-output.0 &&
+	{
+		cat expected-output.0 &&
 		printf "\$Id: NoTerminatingSymbolAtEOF"
-	} > expected-output &&
+	} >expected-output &&
+	{
+		append_cr <expected-output.0 &&
+		printf "\$Id: NoTerminatingSymbolAtEOF"
+	} >expected-output-crlf &&
+	{
+		echo "expanded-keywords ident"
+		echo "expanded-keywords-crlf ident text eol=crlf"
+	} >>.gitattributes &&
 
-	echo "expanded-keywords ident" >> .gitattributes &&
+	rm -f expanded-keywords expanded-keywords-crlf &&
 
-	rm -f expanded-keywords &&
 	git checkout -- expanded-keywords &&
-	cat expanded-keywords &&
-	cmp expanded-keywords expected-output
+	test_cmp expanded-keywords expected-output &&
+
+	git checkout -- expanded-keywords-crlf &&
+	test_cmp expanded-keywords-crlf expected-output-crlf
 '
 
 # The use of %f in a filter definition is expanded to the path to
-- 
1.7.5.2.483.gc61ca

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v2?] streaming: filter cascading
  2011-05-25  1:06       ` Junio C Hamano
  2011-05-25  2:18         ` [PATCH] t0021: test application of both crlf and ident Junio C Hamano
@ 2011-05-25  2:23         ` Junio C Hamano
  1 sibling, 0 replies; 12+ messages in thread
From: Junio C Hamano @ 2011-05-25  2:23 UTC (permalink / raw)
  To: git; +Cc: René Scharfe

This implements an internal "cascade" filter mechanism that plugs
two filters in series.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---

 * I said that I suspect cascade was not working when I sent out "What's
   cooking", and I was right X-<.

   This replaces what has been queued for a few days.

 convert.c |  126 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-------
 1 files changed, 112 insertions(+), 14 deletions(-)

diff --git a/convert.c b/convert.c
index 0c42bcb..85939c2 100644
--- a/convert.c
+++ b/convert.c
@@ -915,6 +915,112 @@ static struct stream_filter lf_to_crlf_filter_singleton = {
 
 
 /*
+ * Cascade filter
+ */
+#define FILTER_BUFFER 1024
+struct cascade_filter {
+	struct stream_filter filter;
+	struct stream_filter *one;
+	struct stream_filter *two;
+	char buf[FILTER_BUFFER];
+	int end, ptr;
+};
+
+static int cascade_filter_fn(struct stream_filter *filter,
+			     const char *input, size_t *isize_p,
+			     char *output, size_t *osize_p)
+{
+	struct cascade_filter *cas = (struct cascade_filter *) filter;
+	size_t filled = 0;
+	size_t sz = *osize_p;
+	size_t to_feed, remaining;
+
+	/*
+	 * input -- (one) --> buf -- (two) --> output
+	 */
+	while (filled < sz) {
+		remaining = sz - filled;
+
+		/* do we already have something to feed two with? */
+		if (cas->ptr < cas->end) {
+			to_feed = cas->end - cas->ptr;
+			if (stream_filter(cas->two,
+					  cas->buf + cas->ptr, &to_feed,
+					  output + filled, &remaining))
+				return -1;
+			cas->ptr += (cas->end - cas->ptr) - to_feed;
+			filled = sz - remaining;
+			continue;
+		}
+
+		/* feed one from upstream and have it emit into our buffer */
+		to_feed = input ? *isize_p : 0;
+		if (input && !to_feed)
+			break;
+		remaining = sizeof(cas->buf);
+		if (stream_filter(cas->one,
+				  input, &to_feed,
+				  cas->buf, &remaining))
+			return -1;
+		cas->end = sizeof(cas->buf) - remaining;
+		cas->ptr = 0;
+		if (input) {
+			size_t fed = *isize_p - to_feed;
+			*isize_p -= fed;
+			input += fed;
+		}
+
+		/* do we know that we drained one completely? */
+		if (input || cas->end)
+			continue;
+
+		/* tell two to drain; we have nothing more to give it */
+		to_feed = 0;
+		remaining = sz - filled;
+		if (stream_filter(cas->two,
+				  NULL, &to_feed,
+				  output + filled, &remaining))
+			return -1;
+		if (remaining == (sz - filled))
+			break; /* completely drained two */
+		filled = sz - remaining;
+	}
+	*osize_p -= filled;
+	return 0;
+}
+
+static void cascade_free_fn(struct stream_filter *filter)
+{
+	struct cascade_filter *cas = (struct cascade_filter *)filter;
+	free_stream_filter(cas->one);
+	free_stream_filter(cas->two);
+	free(filter);
+}
+
+static struct stream_filter_vtbl cascade_vtbl = {
+	cascade_filter_fn,
+	cascade_free_fn,
+};
+
+static struct stream_filter *cascade_filter(struct stream_filter *one,
+					    struct stream_filter *two)
+{
+	struct cascade_filter *cascade;
+
+	if (!one || is_null_stream_filter(one))
+		return two;
+	if (!two || is_null_stream_filter(two))
+		return one;
+
+	cascade = xmalloc(sizeof(*cascade));
+	cascade->one = one;
+	cascade->two = two;
+	cascade->end = cascade->ptr = 0;
+	cascade->filter.vtbl = &cascade_vtbl;
+	return (struct stream_filter *)cascade;
+}
+
+/*
  * ident filter
  */
 #define IDENT_DRAINING (-1)
@@ -1083,20 +1189,12 @@ struct stream_filter *get_stream_filter(const char *path, const unsigned char *s
 	crlf_action = input_crlf_action(ca.crlf_action, ca.eol_attr);
 
 	if ((crlf_action == CRLF_BINARY) || (crlf_action == CRLF_INPUT) ||
-	    (crlf_action == CRLF_GUESS && auto_crlf == AUTO_CRLF_FALSE)) {
-		if (filter) {
-			free_stream_filter(filter);
-			return NULL;
-		}
-		return &null_filter_singleton;
-	} else if (output_eol(crlf_action) == EOL_CRLF &&
-		   !(crlf_action == CRLF_AUTO || crlf_action == CRLF_GUESS)) {
-		if (filter) {
-			free_stream_filter(filter);
-			return NULL;
-		}
-		return &lf_to_crlf_filter_singleton;
-	}
+	    (crlf_action == CRLF_GUESS && auto_crlf == AUTO_CRLF_FALSE))
+		filter = cascade_filter(filter, &null_filter_singleton);
+
+	else if (output_eol(crlf_action) == EOL_CRLF &&
+		 !(crlf_action == CRLF_AUTO || crlf_action == CRLF_GUESS))
+		filter = cascade_filter(filter, &lf_to_crlf_filter_singleton);
 
 	return filter;
 }
-- 
1.7.5.2.483.gc61ca

^ permalink raw reply related	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2011-05-25  2:23 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-05-21  6:58 [PATCH 0/4] streaming conversion API Junio C Hamano
2011-05-21  6:58 ` [PATCH] convert.h: move declarations for conversion from cache.h Junio C Hamano
2011-05-21  6:58 ` [PATCH 2/4] Add streaming filter API Junio C Hamano
2011-05-21  6:58 ` [PATCH 3/4] Add LF-to-CRLF streaming conversion Junio C Hamano
2011-05-21  6:58 ` [PATCH 4/4] streaming filter: ident filter and filter cascading Junio C Hamano
2011-05-21 21:05   ` René Scharfe
2011-05-21 21:25     ` René Scharfe
2011-05-25  1:06       ` Junio C Hamano
2011-05-25  2:18         ` [PATCH] t0021: test application of both crlf and ident Junio C Hamano
2011-05-25  2:23         ` [PATCH v2?] streaming: filter cascading Junio C Hamano
2011-05-21 22:08     ` [PATCH 4/4] streaming filter: ident filter and " René Scharfe
2011-05-22  1:00       ` Junio C Hamano

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.