All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v5 0/6] [GSoC] remove_subtree(): reimplement using iterators
@ 2017-03-30  3:32 Daniel Ferreira
  2017-03-30  3:32 ` [PATCH v5 1/6] dir_iterator: add helpers to dir_iterator_advance Daniel Ferreira
                   ` (6 more replies)
  0 siblings, 7 replies; 19+ messages in thread
From: Daniel Ferreira @ 2017-03-30  3:32 UTC (permalink / raw)
  To: git; +Cc: gitster, sbeller, pclouds, mhagger, Daniel Ferreira

This is the fifth version of a patch series that implements the GSoC
microproject of converting a recursive call to readdir() to use dir_iterator.

v1: https://public-inbox.org/git/CAGZ79kZwT-9mHTiOJ5CEjk2wDFkn6+NcogjX0=vjhsAh16ANYg@mail.gmail.com/T/#t
v2: https://public-inbox.org/git/CACsJy8Dxh-QPBBLfaFWPAWUsbA9GVXA7x+mXLjEvYKhk1zOpig@mail.gmail.com/T/#t
v3: https://public-inbox.org/git/CAGZ79kYtpmURSQWPumobA=e3JBFjKhWCdv_LPhKCd71ZRwMovA@mail.gmail.com/T/#t
v4: https://public-inbox.org/git/1490747533-89143-1-git-send-email-bnmvco@gmail.com/T/#e437a63e0c22c00c69b5d92977c9b438ed2b9fd3a

I would like to really thank Michael for the incredibly thorough review of
the last version of this series. I never expected anyone to give that
level of attention to this change, and it's really, really appreciated.

All of the points he addressed are fixed in this version. As always, more
feedback is greatly appreciated.

Thanks,
Daniel.

Daniel Ferreira (6):
  dir_iterator: add helpers to dir_iterator_advance
  dir_iterator: refactor state machine model
  dir_iterator: iterate over dir after its contents
  dir_iterator: add tests for dir_iterator API
  remove_subtree(): reimplement using iterators
  remove_subtree(): test removing nested directories

 Makefile                        |   1 +
 dir-iterator.c                  | 123 ++++++++++++++++++++++++++++++----------
 dir-iterator.h                  |  17 ++++--
 entry.c                         |  41 +++++---------
 refs/files-backend.c            |   2 +-
 t/helper/test-dir-iterator.c    |  32 +++++++++++
 t/t0065-dir-iterator.sh         |  45 +++++++++++++++
 t/t2000-checkout-cache-clash.sh |  11 ++++
 8 files changed, 210 insertions(+), 62 deletions(-)
 create mode 100644 t/helper/test-dir-iterator.c
 create mode 100755 t/t0065-dir-iterator.sh

--
2.7.4 (Apple Git-66)


^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH v5 1/6] dir_iterator: add helpers to dir_iterator_advance
  2017-03-30  3:32 [PATCH v5 0/6] [GSoC] remove_subtree(): reimplement using iterators Daniel Ferreira
@ 2017-03-30  3:32 ` Daniel Ferreira
  2017-03-30  3:32 ` [PATCH v5 2/6] dir_iterator: refactor state machine model Daniel Ferreira
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 19+ messages in thread
From: Daniel Ferreira @ 2017-03-30  3:32 UTC (permalink / raw)
  To: git; +Cc: gitster, sbeller, pclouds, mhagger, Daniel Ferreira

Create inline helpers to dir_iterator_advance(). Make
dir_iterator_advance()'s code more legible and allow some behavior to
be reusable.

Signed-off-by: Daniel Ferreira <bnmvco@gmail.com>
---
 dir-iterator.c | 65 +++++++++++++++++++++++++++++++++++++---------------------
 1 file changed, 42 insertions(+), 23 deletions(-)

diff --git a/dir-iterator.c b/dir-iterator.c
index 34182a9..ce8bf81 100644
--- a/dir-iterator.c
+++ b/dir-iterator.c
@@ -50,6 +50,43 @@ struct dir_iterator_int {
 	struct dir_iterator_level *levels;
 };

+static inline void push_dir_level(struct dir_iterator_int *iter, struct dir_iterator_level *level)
+{
+	level->dir_state = DIR_STATE_RECURSE;
+	ALLOC_GROW(iter->levels, iter->levels_nr + 1,
+		   iter->levels_alloc);
+	level = &iter->levels[iter->levels_nr++];
+	level->initialized = 0;
+}
+
+static inline int pop_dir_level(struct dir_iterator_int *iter)
+{
+	return --iter->levels_nr;
+}
+
+static inline int set_iterator_data(struct dir_iterator_int *iter, struct dir_iterator_level *level)
+{
+	if (lstat(iter->base.path.buf, &iter->base.st) < 0) {
+		if (errno != ENOENT)
+			warning("error reading path '%s': %s",
+				iter->base.path.buf,
+				strerror(errno));
+		return -1;
+	}
+
+	/*
+	 * We have to set these each time because
+	 * the path strbuf might have been realloc()ed.
+	 */
+	iter->base.relative_path =
+		iter->base.path.buf + iter->levels[0].prefix_len;
+	iter->base.basename =
+		iter->base.path.buf + level->prefix_len;
+	level->dir_state = DIR_STATE_ITER;
+
+	return 0;
+}
+
 int dir_iterator_advance(struct dir_iterator *dir_iterator)
 {
 	struct dir_iterator_int *iter =
@@ -84,11 +121,7 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator)
 				 * over; now prepare to iterate into
 				 * it.
 				 */
-				level->dir_state = DIR_STATE_RECURSE;
-				ALLOC_GROW(iter->levels, iter->levels_nr + 1,
-					   iter->levels_alloc);
-				level = &iter->levels[iter->levels_nr++];
-				level->initialized = 0;
+				push_dir_level(iter, level);
 				continue;
 			} else {
 				/*
@@ -104,7 +137,7 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator)
 			 * This level is exhausted (or wasn't opened
 			 * successfully); pop up a level.
 			 */
-			if (--iter->levels_nr == 0)
+			if (pop_dir_level(iter) == 0)
 				return dir_iterator_abort(dir_iterator);

 			continue;
@@ -129,7 +162,7 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator)
 						iter->base.path.buf, strerror(errno));

 				level->dir = NULL;
-				if (--iter->levels_nr == 0)
+				if (pop_dir_level(iter) == 0)
 					return dir_iterator_abort(dir_iterator);
 				break;
 			}
@@ -138,23 +171,9 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator)
 				continue;

 			strbuf_addstr(&iter->base.path, de->d_name);
-			if (lstat(iter->base.path.buf, &iter->base.st) < 0) {
-				if (errno != ENOENT)
-					warning("error reading path '%s': %s",
-						iter->base.path.buf,
-						strerror(errno));
-				continue;
-			}

-			/*
-			 * We have to set these each time because
-			 * the path strbuf might have been realloc()ed.
-			 */
-			iter->base.relative_path =
-				iter->base.path.buf + iter->levels[0].prefix_len;
-			iter->base.basename =
-				iter->base.path.buf + level->prefix_len;
-			level->dir_state = DIR_STATE_ITER;
+			if (set_iterator_data(iter, level))
+				continue;

 			return ITER_OK;
 		}
--
2.7.4 (Apple Git-66)


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH v5 2/6] dir_iterator: refactor state machine model
  2017-03-30  3:32 [PATCH v5 0/6] [GSoC] remove_subtree(): reimplement using iterators Daniel Ferreira
  2017-03-30  3:32 ` [PATCH v5 1/6] dir_iterator: add helpers to dir_iterator_advance Daniel Ferreira
@ 2017-03-30  3:32 ` Daniel Ferreira
  2017-03-30  8:18   ` Michael Haggerty
  2017-03-30  3:32 ` [PATCH v5 3/6] dir_iterator: iterate over dir after its contents Daniel Ferreira
                   ` (4 subsequent siblings)
  6 siblings, 1 reply; 19+ messages in thread
From: Daniel Ferreira @ 2017-03-30  3:32 UTC (permalink / raw)
  To: git; +Cc: gitster, sbeller, pclouds, mhagger, Daniel Ferreira

Remove the "initialized" member of dir_iterator_level. Replace its
functionality with a DIR_STATE_PUSH state in the
dir_iterator_level.dir_state enum.

This serves to remove a redundant property in the dir_iterator_level
struct and ease comprehension of the state machine's behavior.

Signed-off-by: Daniel Ferreira <bnmvco@gmail.com>
---
 dir-iterator.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/dir-iterator.c b/dir-iterator.c
index ce8bf81..3ac984b 100644
--- a/dir-iterator.c
+++ b/dir-iterator.c
@@ -4,8 +4,6 @@
 #include "dir-iterator.h"

 struct dir_iterator_level {
-	int initialized;
-
 	DIR *dir;

 	/*
@@ -20,6 +18,7 @@ struct dir_iterator_level {
 	 * iteration and also iterated into):
 	 */
 	enum {
+		DIR_STATE_PUSH,
 		DIR_STATE_ITER,
 		DIR_STATE_RECURSE
 	} dir_state;
@@ -55,8 +54,10 @@ static inline void push_dir_level(struct dir_iterator_int *iter, struct dir_iter
 	level->dir_state = DIR_STATE_RECURSE;
 	ALLOC_GROW(iter->levels, iter->levels_nr + 1,
 		   iter->levels_alloc);
+
+	/* Push a new level */
 	level = &iter->levels[iter->levels_nr++];
-	level->initialized = 0;
+	level->dir_state = DIR_STATE_PUSH;
 }

 static inline int pop_dir_level(struct dir_iterator_int *iter)
@@ -97,7 +98,7 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator)
 			&iter->levels[iter->levels_nr - 1];
 		struct dirent *de;

-		if (!level->initialized) {
+		if (level->dir_state == DIR_STATE_PUSH) {
 			/*
 			 * Note: dir_iterator_begin() ensures that
 			 * path is not the empty string.
@@ -112,8 +113,6 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator)
 					iter->base.path.buf, strerror(errno));
 				/* Popping the level is handled below */
 			}
-
-			level->initialized = 1;
 		} else if (S_ISDIR(iter->base.st.st_mode)) {
 			if (level->dir_state == DIR_STATE_ITER) {
 				/*
@@ -215,7 +214,7 @@ struct dir_iterator *dir_iterator_begin(const char *path)
 	ALLOC_GROW(iter->levels, 10, iter->levels_alloc);

 	iter->levels_nr = 1;
-	iter->levels[0].initialized = 0;
+	iter->levels[0].dir_state = DIR_STATE_PUSH;

 	return dir_iterator;
 }
--
2.7.4 (Apple Git-66)


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH v5 3/6] dir_iterator: iterate over dir after its contents
  2017-03-30  3:32 [PATCH v5 0/6] [GSoC] remove_subtree(): reimplement using iterators Daniel Ferreira
  2017-03-30  3:32 ` [PATCH v5 1/6] dir_iterator: add helpers to dir_iterator_advance Daniel Ferreira
  2017-03-30  3:32 ` [PATCH v5 2/6] dir_iterator: refactor state machine model Daniel Ferreira
@ 2017-03-30  3:32 ` Daniel Ferreira
  2017-03-30 11:03   ` Michael Haggerty
  2017-03-30  3:32 ` [PATCH v5 4/6] dir_iterator: add tests for dir_iterator API Daniel Ferreira
                   ` (3 subsequent siblings)
  6 siblings, 1 reply; 19+ messages in thread
From: Daniel Ferreira @ 2017-03-30  3:32 UTC (permalink / raw)
  To: git; +Cc: gitster, sbeller, pclouds, mhagger, Daniel Ferreira

Create an option for the dir_iterator API to iterate over subdirectories
only after having iterated through their contents. This feature was
predicted, although not implemented by 0fe5043 ("dir_iterator: new API
for iterating over a directory tree", 2016-06-18).

Add the "flags" parameter to dir_iterator_create, allowing for the
aforementioned "depth-first" iteration mode to be enabled. Currently,
the only acceptable flag is DIR_ITERATOR_DEPTH_FIRST.

This is useful for recursively removing a directory and calling rmdir()
on a directory only after all of its contents have been wiped.

Amend a call to dir_iterator_begin() to pass the flags parameter
introduced.

Signed-off-by: Daniel Ferreira <bnmvco@gmail.com>
---
 dir-iterator.c       | 53 ++++++++++++++++++++++++++++++++++++++++++++++++----
 dir-iterator.h       | 17 ++++++++++++-----
 refs/files-backend.c |  2 +-
 3 files changed, 62 insertions(+), 10 deletions(-)

diff --git a/dir-iterator.c b/dir-iterator.c
index 3ac984b..05d53d2 100644
--- a/dir-iterator.c
+++ b/dir-iterator.c
@@ -47,6 +47,9 @@ struct dir_iterator_int {
 	 * that will be included in this iteration.
 	 */
 	struct dir_iterator_level *levels;
+
+	/* Holds the flags passed to dir_iterator_begin(). */
+	unsigned flags;
 };
 
 static inline void push_dir_level(struct dir_iterator_int *iter, struct dir_iterator_level *level)
@@ -113,12 +116,14 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator)
 					iter->base.path.buf, strerror(errno));
 				/* Popping the level is handled below */
 			}
-		} else if (S_ISDIR(iter->base.st.st_mode)) {
+		} else if (S_ISDIR(iter->base.st.st_mode) &&
+			!(iter->flags & DIR_ITERATOR_POST_ORDER_TRAVERSAL)) {
 			if (level->dir_state == DIR_STATE_ITER) {
 				/*
 				 * The directory was just iterated
 				 * over; now prepare to iterate into
-				 * it.
+				 * it (unless an option is set for us
+				 * to do otherwise).
 				 */
 				push_dir_level(iter, level);
 				continue;
@@ -152,7 +157,7 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator)
 			de = readdir(level->dir);
 
 			if (!de) {
-				/* This level is exhausted; pop up a level. */
+				/* This level is exhausted  */
 				if (errno) {
 					warning("error reading directory %s: %s",
 						iter->base.path.buf, strerror(errno));
@@ -160,6 +165,32 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator)
 					warning("error closing directory %s: %s",
 						iter->base.path.buf, strerror(errno));
 
+				if (iter->flags & DIR_ITERATOR_POST_ORDER_TRAVERSAL) {
+					/* If we are handling dirpaths after their contents,
+					 * we have to iterate over the directory now that we'll
+					 * have finished iterating into it. */
+					level->dir = NULL;
+
+					if (pop_dir_level(iter) == 0)
+						return dir_iterator_abort(dir_iterator);
+
+					level = &iter->levels[iter->levels_nr - 1];
+					/* Since we are iterating through the dirpath
+					 * after we have gone through it, we still need
+					 * to get rid of the trailing slash we appended.
+					 *
+					 * This may generate issues if we ever want to
+					 * iterate through the root directory AND have
+					 * post-order traversal enabled.
+					 */
+					strbuf_strip_suffix(&iter->base.path, "/");
+
+					if (set_iterator_data(iter, level))
+						continue;
+
+					return ITER_OK;
+				}
+
 				level->dir = NULL;
 				if (pop_dir_level(iter) == 0)
 					return dir_iterator_abort(dir_iterator);
@@ -174,6 +205,18 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator)
 			if (set_iterator_data(iter, level))
 				continue;
 
+			/*
+			 * If we want to iterate dirs after files, we shall
+			 * begin looking into them *before* we return the dir
+			 * itself.
+			 */
+			if (S_ISDIR(iter->base.st.st_mode) &&
+				(iter->flags & DIR_ITERATOR_POST_ORDER_TRAVERSAL)) {
+				push_dir_level(iter, level);
+
+				break;
+			}
+
 			return ITER_OK;
 		}
 	}
@@ -200,7 +243,7 @@ int dir_iterator_abort(struct dir_iterator *dir_iterator)
 	return ITER_DONE;
 }
 
-struct dir_iterator *dir_iterator_begin(const char *path)
+struct dir_iterator *dir_iterator_begin(const char *path, unsigned flags)
 {
 	struct dir_iterator_int *iter = xcalloc(1, sizeof(*iter));
 	struct dir_iterator *dir_iterator = &iter->base;
@@ -208,6 +251,8 @@ struct dir_iterator *dir_iterator_begin(const char *path)
 	if (!path || !*path)
 		die("BUG: empty path passed to dir_iterator_begin()");
 
+	iter->flags = flags;
+
 	strbuf_init(&iter->base.path, PATH_MAX);
 	strbuf_addstr(&iter->base.path, path);
 
diff --git a/dir-iterator.h b/dir-iterator.h
index 27739e6..649ccf6 100644
--- a/dir-iterator.h
+++ b/dir-iterator.h
@@ -11,8 +11,7 @@
  * Every time dir_iterator_advance() is called, update the members of
  * the dir_iterator structure to reflect the next path in the
  * iteration. The order that paths are iterated over within a
- * directory is undefined, but directory paths are always iterated
- * over before the subdirectory contents.
+ * directory is undefined.
  *
  * A typical iteration looks like this:
  *
@@ -38,6 +37,13 @@
  * dir_iterator_advance() again.
  */
 
+/* Possible flags for dir_iterator_begin().
+ *
+ * DIR_ITERATOR_POST_ORDER_TRAVERSAL: ensures subdirectories and
+ * their contents are iterated through before the containing directory.
+ */
+#define DIR_ITERATOR_POST_ORDER_TRAVERSAL (1 << 0)
+
 struct dir_iterator {
 	/* The current path: */
 	struct strbuf path;
@@ -57,15 +63,16 @@ struct dir_iterator {
 };
 
 /*
- * Start a directory iteration over path. Return a dir_iterator that
- * holds the internal state of the iteration.
+ * Start a directory iteration over path, with options specified in
+ * 'flags'. Return a dir_iterator that holds the internal state of
+ * the iteration.
  *
  * The iteration includes all paths under path, not including path
  * itself and not including "." or ".." entries.
  *
  * path is the starting directory. An internal copy will be made.
  */
-struct dir_iterator *dir_iterator_begin(const char *path);
+struct dir_iterator *dir_iterator_begin(const char *path, unsigned flags);
 
 /*
  * Advance the iterator to the first or next item and return ITER_OK.
diff --git a/refs/files-backend.c b/refs/files-backend.c
index 50188e9..b4bba74 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -3346,7 +3346,7 @@ static struct ref_iterator *files_reflog_iterator_begin(struct ref_store *ref_st
 	files_downcast(ref_store, 0, "reflog_iterator_begin");
 
 	base_ref_iterator_init(ref_iterator, &files_reflog_iterator_vtable);
-	iter->dir_iterator = dir_iterator_begin(git_path("logs"));
+	iter->dir_iterator = dir_iterator_begin(git_path("logs"), 0);
 	return ref_iterator;
 }
 
-- 
2.7.4 (Apple Git-66)


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH v5 4/6] dir_iterator: add tests for dir_iterator API
  2017-03-30  3:32 [PATCH v5 0/6] [GSoC] remove_subtree(): reimplement using iterators Daniel Ferreira
                   ` (2 preceding siblings ...)
  2017-03-30  3:32 ` [PATCH v5 3/6] dir_iterator: iterate over dir after its contents Daniel Ferreira
@ 2017-03-30  3:32 ` Daniel Ferreira
  2017-03-30  7:46   ` Michael Haggerty
                     ` (2 more replies)
  2017-03-30  3:32 ` [PATCH v5 5/6] remove_subtree(): reimplement using iterators Daniel Ferreira
                   ` (2 subsequent siblings)
  6 siblings, 3 replies; 19+ messages in thread
From: Daniel Ferreira @ 2017-03-30  3:32 UTC (permalink / raw)
  To: git; +Cc: gitster, sbeller, pclouds, mhagger, Daniel Ferreira

Create t/helper/test-dir-iterator.c, which prints relevant information
about a directory tree iterated over with dir_iterator.

Create t/t0065-dir-iterator.sh, which tests that dir_iterator does
iterate through a whole directory tree and that post-order directory
iteration is correctly implemented.

Signed-off-by: Daniel Ferreira <bnmvco@gmail.com>
---
 Makefile                     |  1 +
 t/helper/test-dir-iterator.c | 32 +++++++++++++++++++++++++++++++
 t/t0065-dir-iterator.sh      | 45 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 78 insertions(+)
 create mode 100644 t/helper/test-dir-iterator.c
 create mode 100755 t/t0065-dir-iterator.sh

diff --git a/Makefile b/Makefile
index a5a11e7..d0245f3 100644
--- a/Makefile
+++ b/Makefile
@@ -607,6 +607,7 @@ TEST_PROGRAMS_NEED_X += test-ctype
 TEST_PROGRAMS_NEED_X += test-config
 TEST_PROGRAMS_NEED_X += test-date
 TEST_PROGRAMS_NEED_X += test-delta
+TEST_PROGRAMS_NEED_X += test-dir-iterator
 TEST_PROGRAMS_NEED_X += test-dump-cache-tree
 TEST_PROGRAMS_NEED_X += test-dump-split-index
 TEST_PROGRAMS_NEED_X += test-dump-untracked-cache
diff --git a/t/helper/test-dir-iterator.c b/t/helper/test-dir-iterator.c
new file mode 100644
index 0000000..b4a148f
--- /dev/null
+++ b/t/helper/test-dir-iterator.c
@@ -0,0 +1,32 @@
+#include "cache.h"
+#include "blob.h"
+#include "dir.h"
+#include "streaming.h"
+#include "iterator.h"
+#include "dir-iterator.h"
+
+int cmd_main(int argc, const char **argv) {
+	if (argc < 2) {
+		return 1;
+	}
+
+	struct strbuf path = STRBUF_INIT;
+	strbuf_add(&path, argv[1], strlen(argv[1]));
+
+	unsigned flag = 0;
+	if (argc == 3 && strcmp(argv[2], "--post-order") == 0)
+		flag = DIR_ITERATOR_POST_ORDER_TRAVERSAL;
+
+	struct dir_iterator *diter = dir_iterator_begin((&path)->buf, flag);
+
+	while (dir_iterator_advance(diter) == ITER_OK) {
+		if (S_ISDIR(diter->st.st_mode))
+			printf("[d] ");
+		else
+			printf("[f] ");
+
+		printf("(%s) %s\n", diter->relative_path, diter->path.buf);
+	}
+
+	return 0;
+}
diff --git a/t/t0065-dir-iterator.sh b/t/t0065-dir-iterator.sh
new file mode 100755
index 0000000..3c8ea9a
--- /dev/null
+++ b/t/t0065-dir-iterator.sh
@@ -0,0 +1,45 @@
+#!/bin/sh
+
+test_description='Test directory iteration.'
+
+. ./test-lib.sh
+
+ITER_SORTED_OUTPUT='[d] (a) ./dir/a
+[d] (a/b) ./dir/a/b
+[d] (a/b/c) ./dir/a/b/c
+[d] (d) ./dir/d
+[d] (d/e) ./dir/d/e
+[d] (d/e/d) ./dir/d/e/d
+[f] (a/b/c/d) ./dir/a/b/c/d
+[f] (a/e) ./dir/a/e
+[f] (b) ./dir/b
+[f] (c) ./dir/c
+[f] (d/e/d/a) ./dir/d/e/d/a'
+
+test_expect_success 'dir-iterator should iterate through all files' '
+	mkdir -p dir &&
+	mkdir -p dir/a/b/c/ &&
+	date >dir/b &&
+	date >dir/c &&
+	mkdir -p dir/d/e/d/ &&
+	date >dir/a/b/c/d &&
+	date >dir/a/e &&
+	date >dir/d/e/d/a &&
+
+	test-dir-iterator ./dir >it &&
+	test "$(sort it)" == "$ITER_SORTED_OUTPUT"
+'
+
+ITER_POST_ORDER_OUTPUT='[f] (a/b/c/d) ./dir2/a/b/c/d
+[d] (a/b/c) ./dir2/a/b/c
+[d] (a/b) ./dir2/a/b
+[d] (a) ./dir2/a'
+
+test_expect_success 'dir-iterator should list files properly on post-order mode' '
+	mkdir -p dir2/a/b/c/ &&
+	date >dir2/a/b/c/d &&
+
+	test "$(test-dir-iterator ./dir2 --post-order)" == "$ITER_POST_ORDER_OUTPUT"
+'
+
+test_done
-- 
2.7.4 (Apple Git-66)


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH v5 5/6] remove_subtree(): reimplement using iterators
  2017-03-30  3:32 [PATCH v5 0/6] [GSoC] remove_subtree(): reimplement using iterators Daniel Ferreira
                   ` (3 preceding siblings ...)
  2017-03-30  3:32 ` [PATCH v5 4/6] dir_iterator: add tests for dir_iterator API Daniel Ferreira
@ 2017-03-30  3:32 ` Daniel Ferreira
  2017-03-30  3:32 ` [PATCH v5 6/6] remove_subtree(): test removing nested directories Daniel Ferreira
  2017-03-30 11:27 ` [PATCH v5 0/6] [GSoC] remove_subtree(): reimplement using iterators Michael Haggerty
  6 siblings, 0 replies; 19+ messages in thread
From: Daniel Ferreira @ 2017-03-30  3:32 UTC (permalink / raw)
  To: git; +Cc: gitster, sbeller, pclouds, mhagger, Daniel Ferreira, Daniel Ferreira

From: Daniel Ferreira <daniel.calibeta@gmail.com>

Use dir_iterator to traverse through remove_subtree()'s directory tree,
avoiding the need for recursive calls to readdir(). Simplify
remove_subtree()'s code.

A conversion similar in purpose was previously done at 46d092a
("for_each_reflog(): reimplement using iterators", 2016-05-21).

Signed-off-by: Daniel Ferreira <bnmvco@gmail.com>
---
 entry.c | 41 +++++++++++++++--------------------------
 1 file changed, 15 insertions(+), 26 deletions(-)

diff --git a/entry.c b/entry.c
index c6eea24..30197b2 100644
--- a/entry.c
+++ b/entry.c
@@ -2,6 +2,8 @@
 #include "blob.h"
 #include "dir.h"
 #include "streaming.h"
+#include "iterator.h"
+#include "dir-iterator.h"

 static void create_directories(const char *path, int path_len,
 			       const struct checkout *state)
@@ -44,33 +46,20 @@ static void create_directories(const char *path, int path_len,
 	free(buf);
 }

-static void remove_subtree(struct strbuf *path)
+static void remove_subtree(const char *path)
 {
-	DIR *dir = opendir(path->buf);
-	struct dirent *de;
-	int origlen = path->len;
-
-	if (!dir)
-		die_errno("cannot opendir '%s'", path->buf);
-	while ((de = readdir(dir)) != NULL) {
-		struct stat st;
-
-		if (is_dot_or_dotdot(de->d_name))
-			continue;
-
-		strbuf_addch(path, '/');
-		strbuf_addstr(path, de->d_name);
-		if (lstat(path->buf, &st))
-			die_errno("cannot lstat '%s'", path->buf);
-		if (S_ISDIR(st.st_mode))
-			remove_subtree(path);
-		else if (unlink(path->buf))
-			die_errno("cannot unlink '%s'", path->buf);
-		strbuf_setlen(path, origlen);
+	struct dir_iterator *diter = dir_iterator_begin(path, DIR_ITERATOR_POST_ORDER_TRAVERSAL);
+
+	while (dir_iterator_advance(diter) == ITER_OK) {
+		if (S_ISDIR(diter->st.st_mode)) {
+			if (rmdir(diter->path.buf))
+				die_errno("cannot rmdir '%s'", diter->path.buf);
+		} else if (unlink(diter->path.buf))
+			die_errno("cannot unlink '%s'", diter->path.buf);
 	}
-	closedir(dir);
-	if (rmdir(path->buf))
-		die_errno("cannot rmdir '%s'", path->buf);
+
+	if (rmdir(path))
+		die_errno("cannot rmdir '%s'", path);
 }

 static int create_file(const char *path, unsigned int mode)
@@ -282,7 +271,7 @@ int checkout_entry(struct cache_entry *ce,
 				return 0;
 			if (!state->force)
 				return error("%s is a directory", path.buf);
-			remove_subtree(&path);
+			remove_subtree(path.buf);
 		} else if (unlink(path.buf))
 			return error_errno("unable to unlink old '%s'", path.buf);
 	} else if (state->not_new)
--
2.7.4 (Apple Git-66)


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH v5 6/6] remove_subtree(): test removing nested directories
  2017-03-30  3:32 [PATCH v5 0/6] [GSoC] remove_subtree(): reimplement using iterators Daniel Ferreira
                   ` (4 preceding siblings ...)
  2017-03-30  3:32 ` [PATCH v5 5/6] remove_subtree(): reimplement using iterators Daniel Ferreira
@ 2017-03-30  3:32 ` Daniel Ferreira
  2017-03-30 11:07   ` Michael Haggerty
  2017-03-30 11:27 ` [PATCH v5 0/6] [GSoC] remove_subtree(): reimplement using iterators Michael Haggerty
  6 siblings, 1 reply; 19+ messages in thread
From: Daniel Ferreira @ 2017-03-30  3:32 UTC (permalink / raw)
  To: git; +Cc: gitster, sbeller, pclouds, mhagger, Daniel Ferreira

Test removing a nested directory when an attempt is made to restore the
index to a state where it does not exist. A similar test could be found
previously in t/t2000-checkout-cache-clash.sh, but it did not check for
nested directories, which could allow a faulty implementation of
remove_subtree() pass the tests.

Signed-off-by: Daniel Ferreira <bnmvco@gmail.com>
---
 t/t2000-checkout-cache-clash.sh | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/t/t2000-checkout-cache-clash.sh b/t/t2000-checkout-cache-clash.sh
index de3edb5..ac10ba3 100755
--- a/t/t2000-checkout-cache-clash.sh
+++ b/t/t2000-checkout-cache-clash.sh
@@ -57,4 +57,15 @@ test_expect_success SYMLINKS 'checkout-index -f twice with --prefix' '
 	git checkout-index -a -f --prefix=there/
 '
 
+test_expect_success 'git checkout-index -f should remove nested subtrees' '
+	echo content >path &&
+	git update-index --add path &&
+	rm path &&
+	mkdir -p path/with/nested/paths &&
+	echo content >path/file1 &&
+	echo content >path/with/nested/paths/file2 &&
+	git checkout-index -f -a &&
+	test ! -d path
+'
+
 test_done
-- 
2.7.4 (Apple Git-66)


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH v5 4/6] dir_iterator: add tests for dir_iterator API
  2017-03-30  3:32 ` [PATCH v5 4/6] dir_iterator: add tests for dir_iterator API Daniel Ferreira
@ 2017-03-30  7:46   ` Michael Haggerty
  2017-03-30 18:25     ` Daniel Ferreira (theiostream)
  2017-03-30  7:48   ` Michael Haggerty
  2017-03-30  8:05   ` Michael Haggerty
  2 siblings, 1 reply; 19+ messages in thread
From: Michael Haggerty @ 2017-03-30  7:46 UTC (permalink / raw)
  To: Daniel Ferreira, git; +Cc: gitster, sbeller, pclouds

On 03/30/2017 05:32 AM, Daniel Ferreira wrote:
> Create t/helper/test-dir-iterator.c, which prints relevant information
> about a directory tree iterated over with dir_iterator.
> 
> Create t/t0065-dir-iterator.sh, which tests that dir_iterator does
> iterate through a whole directory tree and that post-order directory
> iteration is correctly implemented.

Thanks for adding these tests (which really I should have added when I
first introduced `dir_iterator`).

As a general rule, it's best to add tests:

* of pre-existing functionality as soon as possible in a patch
  series so that the tests can be used to check your own work,
  commit by commit

* of new functionality in the commit that adds the new functionality,
  so that it is obvious that the new functionality works

* of fixes to existing bugs *before* the bug fix, as a
  `test_expect_failure` test, to prove that the bug exists and that
  your test catches it. Then in a second commit, actually fix the bug
  and change the test from `test_expect_failure` to
  `test_expect_success`, to prove that your change fixed the bug.

Concretely, for this patch series, that means that you should add part
of the helper program and part of t0065 (i.e., everything that applies
to pre-order iteration) as the first patch of your series. Then you
should add the rest in patch "dir_iterator: iterate over dir after its
contents", at the same time as you add post-order iteration.

> ---
>  Makefile                     |  1 +
>  t/helper/test-dir-iterator.c | 32 +++++++++++++++++++++++++++++++
>  t/t0065-dir-iterator.sh      | 45 ++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 78 insertions(+)
>  create mode 100644 t/helper/test-dir-iterator.c
>  create mode 100755 t/t0065-dir-iterator.sh
> 
> diff --git a/Makefile b/Makefile
> index a5a11e7..d0245f3 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -607,6 +607,7 @@ TEST_PROGRAMS_NEED_X += test-ctype
>  TEST_PROGRAMS_NEED_X += test-config
>  TEST_PROGRAMS_NEED_X += test-date
>  TEST_PROGRAMS_NEED_X += test-delta
> +TEST_PROGRAMS_NEED_X += test-dir-iterator
>  TEST_PROGRAMS_NEED_X += test-dump-cache-tree
>  TEST_PROGRAMS_NEED_X += test-dump-split-index
>  TEST_PROGRAMS_NEED_X += test-dump-untracked-cache
> diff --git a/t/helper/test-dir-iterator.c b/t/helper/test-dir-iterator.c
> new file mode 100644
> index 0000000..b4a148f
> --- /dev/null
> +++ b/t/helper/test-dir-iterator.c
> @@ -0,0 +1,32 @@
> +#include "cache.h"
> +#include "blob.h"
> +#include "dir.h"
> +#include "streaming.h"
> +#include "iterator.h"
> +#include "dir-iterator.h"

Three of these includes are unneeded and should be removed.

> +
> +int cmd_main(int argc, const char **argv) {
> +	if (argc < 2) {
> +		return 1;
> +	}
> +
> +	struct strbuf path = STRBUF_INIT;
> +	strbuf_add(&path, argv[1], strlen(argv[1]));
> +
> +	unsigned flag = 0;
> +	if (argc == 3 && strcmp(argv[2], "--post-order") == 0)
> +		flag = DIR_ITERATOR_POST_ORDER_TRAVERSAL;

It's conventional to handle options *before* other arguments. Even
though this is only a test script, it's preferable to adhere to
convention unless there is a strong reason to deviate from it. You might
looks at `test-submodule-config.c` for example code.

> +	struct dir_iterator *diter = dir_iterator_begin((&path)->buf, flag);

`(&path)->buf` can be shortened to `path.buf`.

> +
> +	while (dir_iterator_advance(diter) == ITER_OK) {
> +		if (S_ISDIR(diter->st.st_mode))
> +			printf("[d] ");
> +		else
> +			printf("[f] ");
> +
> +		printf("(%s) %s\n", diter->relative_path, diter->path.buf);
> +	}
> +
> +	return 0;
> +}
> diff --git a/t/t0065-dir-iterator.sh b/t/t0065-dir-iterator.sh
> new file mode 100755
> index 0000000..3c8ea9a
> --- /dev/null
> +++ b/t/t0065-dir-iterator.sh
> @@ -0,0 +1,45 @@
> +#!/bin/sh
> +
> +test_description='Test directory iteration.'
> +
> +. ./test-lib.sh
> +
> +ITER_SORTED_OUTPUT='[d] (a) ./dir/a
> +[d] (a/b) ./dir/a/b
> +[d] (a/b/c) ./dir/a/b/c
> +[d] (d) ./dir/d
> +[d] (d/e) ./dir/d/e
> +[d] (d/e/d) ./dir/d/e/d
> +[f] (a/b/c/d) ./dir/a/b/c/d
> +[f] (a/e) ./dir/a/e
> +[f] (b) ./dir/b
> +[f] (c) ./dir/c
> +[f] (d/e/d/a) ./dir/d/e/d/a'
> +
> +test_expect_success 'dir-iterator should iterate through all files' '
> +	mkdir -p dir &&
> +	mkdir -p dir/a/b/c/ &&
> +	date >dir/b &&

Is there a special reason to write the date to the file as opposed to, say

    touch dir/b

? (Some people use `: >dir/b` for this purpose, though I've never found
out why.) If you write the date to the file, the reader will be
distracted unnecessarily wondering whether the contents are important to
the test.

> +	date >dir/c &&
> +	mkdir -p dir/d/e/d/ &&
> +	date >dir/a/b/c/d &&
> +	date >dir/a/e &&
> +	date >dir/d/e/d/a &&
> +
> +	test-dir-iterator ./dir >it &&
> +	test "$(sort it)" == "$ITER_SORTED_OUTPUT"

We generally prefer to write the expected and actual program outputs to
files, then `test_cmp` the files. This has a couple of advantages:

* It doesn't risk missing an error return from the program under test,
  which could happen in your second test below.

* If there is a disagreement, `test_cmp` shows how the files differed.

* If you run the test using `-d`, you can inspect the contents of the
  files by hand to help investigate what happened.

Typically that would be done like

	cat >expected-sorted <<-\EOF &&
	[d] (a) ./dir/a
	[d] (a/b) ./dir/a/b
	[d] (a/b/c) ./dir/a/b/c
	[d] (d) ./dir/d
	[d] (d/e) ./dir/d/e
	[d] (d/e/d) ./dir/d/e/d
	[f] (a/b/c/d) ./dir/a/b/c/d
	[f] (a/e) ./dir/a/e
	[f] (b) ./dir/b
	[f] (c) ./dir/c
	[f] (d/e/d/a) ./dir/d/e/d/a
	EOF

	test-dir-iterator ./dir >actual &&
	sort actual >actual-sorted &&
	test_cmp expected-sorted actual-sorted

I also think it would be prudent to run this test twice; once for
pre-order and once for post-order traversal. You could use the same
directory setup and the same `expected-sorted` file for both tests
(e.g., set them up in a preliminary "setup" step).

> +'
> +
> +ITER_POST_ORDER_OUTPUT='[f] (a/b/c/d) ./dir2/a/b/c/d
> +[d] (a/b/c) ./dir2/a/b/c
> +[d] (a/b) ./dir2/a/b
> +[d] (a) ./dir2/a'
> +
> +test_expect_success 'dir-iterator should list files properly on post-order mode' '
> +	mkdir -p dir2/a/b/c/ &&
> +	date >dir2/a/b/c/d &&
> +
> +	test "$(test-dir-iterator ./dir2 --post-order)" == "$ITER_POST_ORDER_OUTPUT"
> +'

How about adding a test for pre-order traversal, too? It would look
almost identical to this test.

> [...]

Michael


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v5 4/6] dir_iterator: add tests for dir_iterator API
  2017-03-30  3:32 ` [PATCH v5 4/6] dir_iterator: add tests for dir_iterator API Daniel Ferreira
  2017-03-30  7:46   ` Michael Haggerty
@ 2017-03-30  7:48   ` Michael Haggerty
  2017-03-30  8:05   ` Michael Haggerty
  2 siblings, 0 replies; 19+ messages in thread
From: Michael Haggerty @ 2017-03-30  7:48 UTC (permalink / raw)
  To: Daniel Ferreira, git; +Cc: gitster, sbeller, pclouds

On 03/30/2017 05:32 AM, Daniel Ferreira wrote:
> Create t/helper/test-dir-iterator.c, which prints relevant information
> about a directory tree iterated over with dir_iterator.
> 
> Create t/t0065-dir-iterator.sh, which tests that dir_iterator does
> iterate through a whole directory tree and that post-order directory
> iteration is correctly implemented.

Please also add the compiled test program `test-dir-iterator` to
`t/helper/.gitignore`.

Michael


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v5 4/6] dir_iterator: add tests for dir_iterator API
  2017-03-30  3:32 ` [PATCH v5 4/6] dir_iterator: add tests for dir_iterator API Daniel Ferreira
  2017-03-30  7:46   ` Michael Haggerty
  2017-03-30  7:48   ` Michael Haggerty
@ 2017-03-30  8:05   ` Michael Haggerty
  2017-03-30 18:26     ` Daniel Ferreira (theiostream)
  2 siblings, 1 reply; 19+ messages in thread
From: Michael Haggerty @ 2017-03-30  8:05 UTC (permalink / raw)
  To: Daniel Ferreira, git; +Cc: gitster, sbeller, pclouds

On 03/30/2017 05:32 AM, Daniel Ferreira wrote:
> Create t/helper/test-dir-iterator.c, which prints relevant information
> about a directory tree iterated over with dir_iterator.
> 
> Create t/t0065-dir-iterator.sh, which tests that dir_iterator does
> iterate through a whole directory tree and that post-order directory
> iteration is correctly implemented.
> 
> [...]
> diff --git a/t/helper/test-dir-iterator.c b/t/helper/test-dir-iterator.c
> new file mode 100644
> index 0000000..b4a148f
> --- /dev/null
> +++ b/t/helper/test-dir-iterator.c
> @@ -0,0 +1,32 @@
> [...]
> +
> +int cmd_main(int argc, const char **argv) {
> +	if (argc < 2) {
> +		return 1;
> +	}
> +
> +	struct strbuf path = STRBUF_INIT;
> +	strbuf_add(&path, argv[1], strlen(argv[1]));
> +
> +	unsigned flag = 0;
> +	if (argc == 3 && strcmp(argv[2], "--post-order") == 0)
> +		flag = DIR_ITERATOR_POST_ORDER_TRAVERSAL;
> +
> +	struct dir_iterator *diter = dir_iterator_begin((&path)->buf, flag);
> +
> +	while (dir_iterator_advance(diter) == ITER_OK) {
> +		if (S_ISDIR(diter->st.st_mode))
> +			printf("[d] ");
> +		else
> +			printf("[f] ");
> +
> +		printf("(%s) %s\n", diter->relative_path, diter->path.buf);
> +	}
> +
> +	return 0;
> +}
> [...]

Oh I forgot to mention, in the Git project we don't allow declarations
to be mixed with code. Apparently there's some ancient compiler
somewhere that doesn't allow it. Declarations always have to be
together, at the top of a block. (Compile with
`-Werror=declaration-after-statement` to detect this.)

Michael


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v5 2/6] dir_iterator: refactor state machine model
  2017-03-30  3:32 ` [PATCH v5 2/6] dir_iterator: refactor state machine model Daniel Ferreira
@ 2017-03-30  8:18   ` Michael Haggerty
  0 siblings, 0 replies; 19+ messages in thread
From: Michael Haggerty @ 2017-03-30  8:18 UTC (permalink / raw)
  To: Daniel Ferreira, git; +Cc: gitster, sbeller, pclouds

On 03/30/2017 05:32 AM, Daniel Ferreira wrote:
> Remove the "initialized" member of dir_iterator_level. Replace its
> functionality with a DIR_STATE_PUSH state in the
> dir_iterator_level.dir_state enum.
> 
> This serves to remove a redundant property in the dir_iterator_level
> struct and ease comprehension of the state machine's behavior.

Nice.

Michael


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v5 3/6] dir_iterator: iterate over dir after its contents
  2017-03-30  3:32 ` [PATCH v5 3/6] dir_iterator: iterate over dir after its contents Daniel Ferreira
@ 2017-03-30 11:03   ` Michael Haggerty
  0 siblings, 0 replies; 19+ messages in thread
From: Michael Haggerty @ 2017-03-30 11:03 UTC (permalink / raw)
  To: Daniel Ferreira, git; +Cc: gitster, sbeller, pclouds

On 03/30/2017 05:32 AM, Daniel Ferreira wrote:
> Create an option for the dir_iterator API to iterate over subdirectories
> only after having iterated through their contents. This feature was
> predicted, although not implemented by 0fe5043 ("dir_iterator: new API
> for iterating over a directory tree", 2016-06-18).
> 
> Add the "flags" parameter to dir_iterator_create, allowing for the
> aforementioned "depth-first" iteration mode to be enabled. Currently,
> the only acceptable flag is DIR_ITERATOR_DEPTH_FIRST.

The flag name has been changed.

> This is useful for recursively removing a directory and calling rmdir()
> on a directory only after all of its contents have been wiped.
> 
> Amend a call to dir_iterator_begin() to pass the flags parameter
> introduced.
> 
> Signed-off-by: Daniel Ferreira <bnmvco@gmail.com>
> ---
>  dir-iterator.c       | 53 ++++++++++++++++++++++++++++++++++++++++++++++++----
>  dir-iterator.h       | 17 ++++++++++++-----
>  refs/files-backend.c |  2 +-
>  3 files changed, 62 insertions(+), 10 deletions(-)
> 
> diff --git a/dir-iterator.c b/dir-iterator.c
> index 3ac984b..05d53d2 100644
> --- a/dir-iterator.c
> +++ b/dir-iterator.c
> @@ -47,6 +47,9 @@ struct dir_iterator_int {
>  	 * that will be included in this iteration.
>  	 */
>  	struct dir_iterator_level *levels;
> +
> +	/* Holds the flags passed to dir_iterator_begin(). */
> +	unsigned flags;
>  };
>  
>  static inline void push_dir_level(struct dir_iterator_int *iter, struct dir_iterator_level *level)
> @@ -113,12 +116,14 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator)
>  					iter->base.path.buf, strerror(errno));
>  				/* Popping the level is handled below */
>  			}
> -		} else if (S_ISDIR(iter->base.st.st_mode)) {
> +		} else if (S_ISDIR(iter->base.st.st_mode) &&
> +			!(iter->flags & DIR_ITERATOR_POST_ORDER_TRAVERSAL)) {
>  			if (level->dir_state == DIR_STATE_ITER) {
>  				/*
>  				 * The directory was just iterated
>  				 * over; now prepare to iterate into
> -				 * it.
> +				 * it (unless an option is set for us
> +				 * to do otherwise).
>  				 */
>  				push_dir_level(iter, level);
>  				continue;
> @@ -152,7 +157,7 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator)
>  			de = readdir(level->dir);
>  
>  			if (!de) {
> -				/* This level is exhausted; pop up a level. */
> +				/* This level is exhausted  */
>  				if (errno) {
>  					warning("error reading directory %s: %s",
>  						iter->base.path.buf, strerror(errno));
> @@ -160,6 +165,32 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator)
>  					warning("error closing directory %s: %s",
>  						iter->base.path.buf, strerror(errno));
>  
> +				if (iter->flags & DIR_ITERATOR_POST_ORDER_TRAVERSAL) {
> +					/* If we are handling dirpaths after their contents,

The comment opening and closing delimiters should be on their own lines.

> +					 * we have to iterate over the directory now that we'll
> +					 * have finished iterating into it. */
> +					level->dir = NULL;

`level->dir = NULL` can be done before the `if` statement rather than
both here and after the `if` statement.

> +
> +					if (pop_dir_level(iter) == 0)
> +						return dir_iterator_abort(dir_iterator);
> +
> +					level = &iter->levels[iter->levels_nr - 1];
> +					/* Since we are iterating through the dirpath

Comment opening should be on its own line.

> +					 * after we have gone through it, we still need
> +					 * to get rid of the trailing slash we appended.
> +					 *
> +					 * This may generate issues if we ever want to
> +					 * iterate through the root directory AND have
> +					 * post-order traversal enabled.
> +					 */
> +					strbuf_strip_suffix(&iter->base.path, "/");

Hmmm, I was wrong again. Remember that this code path is stripping the
"/" from the directory that has just been iterated over, and has already
been closed and popped off the stack. But if we are iterating over "/",
then when it is popped off the stack, `pop_dir_level(iter)` returns
zero, and we never get to this line. So I think you can remove this warning.

> +
> +					if (set_iterator_data(iter, level))
> +						continue;
> +
> +					return ITER_OK;
> +				}
> +
>  				level->dir = NULL;
>  				if (pop_dir_level(iter) == 0)
>  					return dir_iterator_abort(dir_iterator);
> @@ -174,6 +205,18 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator)
>  			if (set_iterator_data(iter, level))
>  				continue;
>  
> +			/*
> +			 * If we want to iterate dirs after files, we shall
> +			 * begin looking into them *before* we return the dir
> +			 * itself.
> +			 */
> +			if (S_ISDIR(iter->base.st.st_mode) &&
> +				(iter->flags & DIR_ITERATOR_POST_ORDER_TRAVERSAL)) {
> +				push_dir_level(iter, level);
> +
> +				break;
> +			}
> +
>  			return ITER_OK;
>  		}
>  	}
> @@ -200,7 +243,7 @@ int dir_iterator_abort(struct dir_iterator *dir_iterator)
>  	return ITER_DONE;
>  }
>  
> -struct dir_iterator *dir_iterator_begin(const char *path)
> +struct dir_iterator *dir_iterator_begin(const char *path, unsigned flags)
>  {
>  	struct dir_iterator_int *iter = xcalloc(1, sizeof(*iter));
>  	struct dir_iterator *dir_iterator = &iter->base;
> @@ -208,6 +251,8 @@ struct dir_iterator *dir_iterator_begin(const char *path)
>  	if (!path || !*path)
>  		die("BUG: empty path passed to dir_iterator_begin()");
>  
> +	iter->flags = flags;
> +
>  	strbuf_init(&iter->base.path, PATH_MAX);
>  	strbuf_addstr(&iter->base.path, path);
>  
> diff --git a/dir-iterator.h b/dir-iterator.h
> index 27739e6..649ccf6 100644
> --- a/dir-iterator.h
> +++ b/dir-iterator.h
> @@ -11,8 +11,7 @@
>   * Every time dir_iterator_advance() is called, update the members of
>   * the dir_iterator structure to reflect the next path in the
>   * iteration. The order that paths are iterated over within a
> - * directory is undefined, but directory paths are always iterated
> - * over before the subdirectory contents.
> + * directory is undefined.
>   *
>   * A typical iteration looks like this:
>   *
> @@ -38,6 +37,13 @@
>   * dir_iterator_advance() again.
>   */
>  
> +/* Possible flags for dir_iterator_begin().

Comment opening should be on its own line.

> [...]

So, using your handy-dandy `test-dir-iterator` program, I did a bunch of
manual testing.

It's a little bit unfortunate (though certainly not a showstopper) that
`lstat()` is called twice on directories in `POST_ORDER` mode.

More significant is that pre-order traversal includes unreadable
directory entries (e.g., those whose permissions don't allow reading),
whereas post-order traversal omits them. The reason is that when
`opendir()` fails, the `if (!level->dir)` code block pops it off the
stack before the `if (iter->flags & DIR_ITERATOR_POST_ORDER_TRAVERSAL)`
block gets a chance to return it to the caller.

This is not great. It seems like callers would want to know about
unreadable directories within the tree.

I haven't checked whether that problem can be fixed up easily in the
current code. But I'm getting convinced that the code (i.e., even my
code, before your changes) is more convoluted than it needs to be, and
that makes it too hard to work with. Quite likely you have the same
feeling by now... :-/

Imagine something like this:

Change the `dir_state` enumeration to the following:

* DIR_STATE_PUSHED -- the top-of-stack directory has just been pushed
  onto the stack but not iterated over or opened.
* DIR_STATE_PRE_ITERATED -- the top-of-stack directory has just been
  returned to the caller (pre-order), but not opened.
* DIR_STATE_ITERATING -- the top-of-stack directory has been opened and
  we are currently iterating over its entries.
* DIR_STATE_ITERATED -- the top-of-stack directory's entries have all
  been iterated over, and the directory has been closed.
* DIR_STATE_POST_ITERATED -- the top-of-stack directory has been
  returned to the caller (post-order) and is ready to be popped.

Then the function could have a single loop with a more conventional
state machine structure *doing exactly one thing per iteration*,
something like:

while (1) {
	if (level->dir_state == DIR_STATE_PUSHED) {
		if (iter->flags & DIR_ITERATOR_DIRS_BEFORE) {
			...set iterator data...
			level->dir_state = DIR_STATE_PRE_ITERATED;
			return ITER_OK;
		} else {
			level->dir_state = DIR_STATE_PRE_ITERATED;
		}
	} else if (level->dir_state == DIR_STATE_PRE_ITERATED) {
		if (iter->flags & DIR_ITERATOR_RECURSE) {
			...open directory...
			level->dir_state = DIR_STATE_ITERATING;
		} else {
			level->dir_state = DIR_STATE_ITERATED;
		}
	} else if (level->dir_state == DIR_STATE_PRE_ITERATING) {
		if (readdir(...)) {
			if (S_ISDIR) {
				...push dir...
				level->dir_state = DIR_STATE_PUSHED;
			} else {
				...set iterator data...
				return ITER_OK;
			}
		} else {
			level->dir_state = DIR_STATE_ITERATED;
		}
	} else if (level->dir_state == DIR_STATE_ITERATED) {
		if (iter->flags & DIR_ITERATOR_DIRS_AFTER) {
			...set iterator data...
			level->dir_state = DIR_STATE_POST_ITERATED;
			return ITER_OK;
		} else {
			level->dir_state = DIR_STATE_POST_ITERATED;
		}
	} else if (level->dir_state == DIR_STATE_POST_ITERATED) {
		...pop dir...
		if (...done...) {
			break;
		}
	}
}

It's a biggish rewrite (and I'm not implying that you have to do it),
but I think the results would be more readable than the current code.

Michael


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v5 6/6] remove_subtree(): test removing nested directories
  2017-03-30  3:32 ` [PATCH v5 6/6] remove_subtree(): test removing nested directories Daniel Ferreira
@ 2017-03-30 11:07   ` Michael Haggerty
  0 siblings, 0 replies; 19+ messages in thread
From: Michael Haggerty @ 2017-03-30 11:07 UTC (permalink / raw)
  To: Daniel Ferreira, git; +Cc: gitster, sbeller, pclouds

On 03/30/2017 05:32 AM, Daniel Ferreira wrote:
> Test removing a nested directory when an attempt is made to restore the
> index to a state where it does not exist. A similar test could be found
> previously in t/t2000-checkout-cache-clash.sh, but it did not check for
> nested directories, which could allow a faulty implementation of
> remove_subtree() pass the tests.
> 
> Signed-off-by: Daniel Ferreira <bnmvco@gmail.com>
> ---
>  t/t2000-checkout-cache-clash.sh | 11 +++++++++++
>  1 file changed, 11 insertions(+)
> 
> diff --git a/t/t2000-checkout-cache-clash.sh b/t/t2000-checkout-cache-clash.sh
> index de3edb5..ac10ba3 100755
> --- a/t/t2000-checkout-cache-clash.sh
> +++ b/t/t2000-checkout-cache-clash.sh
> @@ -57,4 +57,15 @@ test_expect_success SYMLINKS 'checkout-index -f twice with --prefix' '
>  	git checkout-index -a -f --prefix=there/
>  '
>  
> +test_expect_success 'git checkout-index -f should remove nested subtrees' '
> +	echo content >path &&
> +	git update-index --add path &&
> +	rm path &&
> +	mkdir -p path/with/nested/paths &&
> +	echo content >path/file1 &&
> +	echo content >path/with/nested/paths/file2 &&
> +	git checkout-index -f -a &&
> +	test ! -d path
> +'
> +
>  test_done
> 

It would be better for this patch to precede "remove_subtree():
reimplement using iterators", as a slightly better proof that the change
to using iterators doesn't change the behavior.

Michael


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v5 0/6] [GSoC] remove_subtree(): reimplement using iterators
  2017-03-30  3:32 [PATCH v5 0/6] [GSoC] remove_subtree(): reimplement using iterators Daniel Ferreira
                   ` (5 preceding siblings ...)
  2017-03-30  3:32 ` [PATCH v5 6/6] remove_subtree(): test removing nested directories Daniel Ferreira
@ 2017-03-30 11:27 ` Michael Haggerty
  2017-03-30 12:10   ` Duy Nguyen
  6 siblings, 1 reply; 19+ messages in thread
From: Michael Haggerty @ 2017-03-30 11:27 UTC (permalink / raw)
  To: Daniel Ferreira, git; +Cc: gitster, sbeller, pclouds

On 03/30/2017 05:32 AM, Daniel Ferreira wrote:
> This is the fifth version of a patch series that implements the GSoC
> microproject of converting a recursive call to readdir() to use dir_iterator.
> 
> v1: https://public-inbox.org/git/CAGZ79kZwT-9mHTiOJ5CEjk2wDFkn6+NcogjX0=vjhsAh16ANYg@mail.gmail.com/T/#t
> v2: https://public-inbox.org/git/CACsJy8Dxh-QPBBLfaFWPAWUsbA9GVXA7x+mXLjEvYKhk1zOpig@mail.gmail.com/T/#t
> v3: https://public-inbox.org/git/CAGZ79kYtpmURSQWPumobA=e3JBFjKhWCdv_LPhKCd71ZRwMovA@mail.gmail.com/T/#t
> v4: https://public-inbox.org/git/1490747533-89143-1-git-send-email-bnmvco@gmail.com/T/#e437a63e0c22c00c69b5d92977c9b438ed2b9fd3a
> 
> I would like to really thank Michael for the incredibly thorough review of
> the last version of this series. I never expected anyone to give that
> level of attention to this change, and it's really, really appreciated.
> 
> All of the points he addressed are fixed in this version. As always, more
> feedback is greatly appreciated.

Thanks for the reroll. I have reviewed v5 and it's much improved
relative to v4. I'm very encouraged by your quick and constructive
responses and especially that you are sticking with it even though the
project is getting much, much more involved than a typical GSoC
microproject. Perseverance is a very important prerequisite to success
in the Git project.

Please pay attention to formatting trivia like the formatting of
multiline constants. It's boring to give the same feedback multiple
times [1].

It's common that nontrivial patch series have to go though many
iterations before being accepted, so don't get discouraged. Even
experienced developers sometimes get past v10! The important thing is
that every round gets better than the last, that we learn from the
experience together, and that you don't give up, and then it is almost
certain that it will converge eventually!

I can't speak for the GSoC mentors who will make the decision, but I
would expect that your taking on an ambitious project and sticking with
it will leave a good impression, even if you don't quite get your change
accepted by whatever deadline. So don't panic that you are ruining your
chances by picking an ambitious project; quite the opposite is more
likely to be the case.

Michael

[1] softwareswirl.blogspot.com/2014/03/my-secret-tip-for-gsoc-success.html


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v5 0/6] [GSoC] remove_subtree(): reimplement using iterators
  2017-03-30 11:27 ` [PATCH v5 0/6] [GSoC] remove_subtree(): reimplement using iterators Michael Haggerty
@ 2017-03-30 12:10   ` Duy Nguyen
  0 siblings, 0 replies; 19+ messages in thread
From: Duy Nguyen @ 2017-03-30 12:10 UTC (permalink / raw)
  To: Michael Haggerty
  Cc: Daniel Ferreira, Git Mailing List, Junio C Hamano, Stefan Beller

On Thu, Mar 30, 2017 at 6:27 PM, Michael Haggerty <mhagger@alum.mit.edu> wrote:
> I'm very encouraged by your quick and constructive
> responses and especially that you are sticking with it even though the
> project is getting much, much more involved than a typical GSoC
> microproject.

I'm impressed too. The new series made me want to read it, probably
because it's so easy to get into. Unfortunately I don't know enough
about dir-iterator.c to contribute any good feedback. And other minor
things have been all caught by you :)

> It's common that nontrivial patch series have to go though many
> iterations before being accepted, so don't get discouraged. Even
> experienced developers sometimes get past v10!

Pffft.. v10 is for amateurs. Experienced devs aim for v20+!
-- 
Duy

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v5 4/6] dir_iterator: add tests for dir_iterator API
  2017-03-30  7:46   ` Michael Haggerty
@ 2017-03-30 18:25     ` Daniel Ferreira (theiostream)
  2017-04-01  9:03       ` Jeff King
  0 siblings, 1 reply; 19+ messages in thread
From: Daniel Ferreira (theiostream) @ 2017-03-30 18:25 UTC (permalink / raw)
  To: Michael Haggerty
  Cc: Git Mailing List, Junio C Hamano, Stefan Beller, Duy Nguyen

On Thu, Mar 30, 2017 at 4:46 AM, Michael Haggerty <mhagger@alum.mit.edu> wrote:
> Is there a special reason to write the date to the file as opposed to, say
>
>     touch dir/b
>
> ? (Some people use `: >dir/b` for this purpose, though I've never found
> out why.) If you write the date to the file, the reader will be
> distracted unnecessarily wondering whether the contents are important to
> the test.
>

There's no reason. They will be `touch`ed instead of written in a next version.

`:` is a bash builtin that that returns an exit code of zero and
produces no output. On my Mac at least:

bash-3.2$ type :
: is a shell builtin
bash-3.2$ type touch
touch is /usr/bin/touch

I suppose there are reasons to try to keep the most of a shell
script's logic within the shell itself, without involving external
binaries.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v5 4/6] dir_iterator: add tests for dir_iterator API
  2017-03-30  8:05   ` Michael Haggerty
@ 2017-03-30 18:26     ` Daniel Ferreira (theiostream)
  0 siblings, 0 replies; 19+ messages in thread
From: Daniel Ferreira (theiostream) @ 2017-03-30 18:26 UTC (permalink / raw)
  To: Michael Haggerty
  Cc: Git Mailing List, Junio C Hamano, Stefan Beller, Duy Nguyen

On Thu, Mar 30, 2017 at 5:05 AM, Michael Haggerty <mhagger@alum.mit.edu> wrote:
> Oh I forgot to mention, in the Git project we don't allow declarations
> to be mixed with code. Apparently there's some ancient compiler
> somewhere that doesn't allow it. Declarations always have to be
> together, at the top of a block. (Compile with
> `-Werror=declaration-after-statement` to detect this.)

Sorry about that. I'm compiling git on clang and it has a bug that
ignores this warning (it shows up on gcc). I'll watch out for it.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v5 4/6] dir_iterator: add tests for dir_iterator API
  2017-03-30 18:25     ` Daniel Ferreira (theiostream)
@ 2017-04-01  9:03       ` Jeff King
  2017-04-01 17:16         ` Junio C Hamano
  0 siblings, 1 reply; 19+ messages in thread
From: Jeff King @ 2017-04-01  9:03 UTC (permalink / raw)
  To: Daniel Ferreira (theiostream)
  Cc: Michael Haggerty, Git Mailing List, Junio C Hamano,
	Stefan Beller, Duy Nguyen

On Thu, Mar 30, 2017 at 03:25:43PM -0300, Daniel Ferreira (theiostream) wrote:

> On Thu, Mar 30, 2017 at 4:46 AM, Michael Haggerty <mhagger@alum.mit.edu> wrote:
> > Is there a special reason to write the date to the file as opposed to, say
> >
> >     touch dir/b
> >
> > ? (Some people use `: >dir/b` for this purpose, though I've never found
> > out why.) If you write the date to the file, the reader will be
> > distracted unnecessarily wondering whether the contents are important to
> > the test.
> >
> 
> There's no reason. They will be `touch`ed instead of written in a next version.
> 
> `:` is a bash builtin that that returns an exit code of zero and
> produces no output. On my Mac at least:
> 
> bash-3.2$ type :
> : is a shell builtin
> bash-3.2$ type touch
> touch is /usr/bin/touch
> 
> I suppose there are reasons to try to keep the most of a shell
> script's logic within the shell itself, without involving external
> binaries.

I think we actually prefer just:

  >dir/b

in our tests. The advantages over touch are:

  1. It is clear that the output will be empty afterwards (whereas with
     touch, it might just update the timestamp on an existing file).

  2. It's faster, since it doesn't require an extra process.

It's equivalent to ": >dir/b". I think you'll find all three forms in
our test suite, but ">dir/b" is the most common.

-Peff

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v5 4/6] dir_iterator: add tests for dir_iterator API
  2017-04-01  9:03       ` Jeff King
@ 2017-04-01 17:16         ` Junio C Hamano
  0 siblings, 0 replies; 19+ messages in thread
From: Junio C Hamano @ 2017-04-01 17:16 UTC (permalink / raw)
  To: Jeff King
  Cc: Daniel Ferreira (theiostream),
	Michael Haggerty, Git Mailing List, Stefan Beller, Duy Nguyen

Jeff King <peff@peff.net> writes:

> I think we actually prefer just:
>
>   >dir/b
>
> in our tests. The advantages over touch are:
>
>   1. It is clear that the output will be empty afterwards (whereas with
>      touch, it might just update the timestamp on an existing file).
>
>   2. It's faster, since it doesn't require an extra process.
>
> It's equivalent to ": >dir/b". I think you'll find all three forms in
> our test suite, but ">dir/b" is the most common.

Another one.

    3. Avoiding "touch" when we only want to ensure existence of a
       file and sticking to ">dir/b" (with or without ":" no-op)
       lets us reserve "touch" for the intended use of the command.
       When seeing a "touch", the reader is signalled that the
       script cares about the timestamp of the target file updated.

Among the three, this one is the most important reason from the
point of view of making the codebase maintainable in the longer
term.  By forcing the writer to think when choosing between
">target" and "touch target", we reduce the mental load of the
readers.  As is true for most things in computer field, read happens
more often than write while maintaining the code, and that is why
this matters.


^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2017-04-01 17:16 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-03-30  3:32 [PATCH v5 0/6] [GSoC] remove_subtree(): reimplement using iterators Daniel Ferreira
2017-03-30  3:32 ` [PATCH v5 1/6] dir_iterator: add helpers to dir_iterator_advance Daniel Ferreira
2017-03-30  3:32 ` [PATCH v5 2/6] dir_iterator: refactor state machine model Daniel Ferreira
2017-03-30  8:18   ` Michael Haggerty
2017-03-30  3:32 ` [PATCH v5 3/6] dir_iterator: iterate over dir after its contents Daniel Ferreira
2017-03-30 11:03   ` Michael Haggerty
2017-03-30  3:32 ` [PATCH v5 4/6] dir_iterator: add tests for dir_iterator API Daniel Ferreira
2017-03-30  7:46   ` Michael Haggerty
2017-03-30 18:25     ` Daniel Ferreira (theiostream)
2017-04-01  9:03       ` Jeff King
2017-04-01 17:16         ` Junio C Hamano
2017-03-30  7:48   ` Michael Haggerty
2017-03-30  8:05   ` Michael Haggerty
2017-03-30 18:26     ` Daniel Ferreira (theiostream)
2017-03-30  3:32 ` [PATCH v5 5/6] remove_subtree(): reimplement using iterators Daniel Ferreira
2017-03-30  3:32 ` [PATCH v5 6/6] remove_subtree(): test removing nested directories Daniel Ferreira
2017-03-30 11:07   ` Michael Haggerty
2017-03-30 11:27 ` [PATCH v5 0/6] [GSoC] remove_subtree(): reimplement using iterators Michael Haggerty
2017-03-30 12:10   ` Duy Nguyen

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.