git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/4] submodule: implement `module_list` as a builtin helper
@ 2015-08-05  0:04 Stefan Beller
  2015-08-05  0:04 ` [PATCH 2/4] submodule: implement `module_name` " Stefan Beller
                   ` (2 more replies)
  0 siblings, 3 replies; 27+ messages in thread
From: Stefan Beller @ 2015-08-05  0:04 UTC (permalink / raw)
  To: gitster; +Cc: git, jens.lehmann, hvoigt, Stefan Beller

Most of the submodule operations work on a set of submodules.
Calculating and using this set is usually done via:

       module_list "$@" | {
           while read mode sha1 stage sm_path
           do
                # the actual operation
           done
       }

Currently the function `module_list` is implemented in the
git-submodule.sh as a shell script wrapping a perl script.
The rewrite is in C, such that it is faster and can later be
easily adapted when other functions are rewritten in C.

git-submodule.sh similar to the builtin commands will navigate
to the top most directory of the repository and keeping the
subdirectories as a variable. As the helper is called from
within the git-submodule.sh script, we are already navigated
to the root level, but the path arguments are stil relative
to the subdirectory we were in when calling git-submodule.sh.
That's why there is a `--prefix` option pointing to an alternative
path where to anchor relative path arguments.

Signed-off-by: Stefan Beller <sbeller@google.com>
---

The same as yesterday evening, just an entry added to .gitignore.

So we'll have a "git submodule--helper module_list" here.

 .gitignore                  |   1 +
 Makefile                    |   1 +
 builtin.h                   |   1 +
 builtin/submodule--helper.c | 111 ++++++++++++++++++++++++++++++++++++++++++++
 git-submodule.sh            |  54 +++------------------
 git.c                       |   1 +
 6 files changed, 121 insertions(+), 48 deletions(-)
 create mode 100644 builtin/submodule--helper.c

diff --git a/.gitignore b/.gitignore
index a685ec1..2a69ba0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -155,6 +155,7 @@
 /git-status
 /git-stripspace
 /git-submodule
+/git-submodule--helper
 /git-svn
 /git-symbolic-ref
 /git-tag
diff --git a/Makefile b/Makefile
index 7efedbe..460d17a 100644
--- a/Makefile
+++ b/Makefile
@@ -899,6 +899,7 @@ BUILTIN_OBJS += builtin/shortlog.o
 BUILTIN_OBJS += builtin/show-branch.o
 BUILTIN_OBJS += builtin/show-ref.o
 BUILTIN_OBJS += builtin/stripspace.o
+BUILTIN_OBJS += builtin/submodule--helper.o
 BUILTIN_OBJS += builtin/symbolic-ref.o
 BUILTIN_OBJS += builtin/tag.o
 BUILTIN_OBJS += builtin/unpack-file.o
diff --git a/builtin.h b/builtin.h
index 839483d..924e6c4 100644
--- a/builtin.h
+++ b/builtin.h
@@ -119,6 +119,7 @@ extern int cmd_show(int argc, const char **argv, const char *prefix);
 extern int cmd_show_branch(int argc, const char **argv, const char *prefix);
 extern int cmd_status(int argc, const char **argv, const char *prefix);
 extern int cmd_stripspace(int argc, const char **argv, const char *prefix);
+extern int cmd_submodule__helper(int argc, const char **argv, const char *prefix);
 extern int cmd_symbolic_ref(int argc, const char **argv, const char *prefix);
 extern int cmd_tag(int argc, const char **argv, const char *prefix);
 extern int cmd_tar_tree(int argc, const char **argv, const char *prefix);
diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
new file mode 100644
index 0000000..cb18ddf
--- /dev/null
+++ b/builtin/submodule--helper.c
@@ -0,0 +1,111 @@
+#include "builtin.h"
+#include "cache.h"
+#include "parse-options.h"
+#include "quote.h"
+#include "pathspec.h"
+#include "dir.h"
+#include "utf8.h"
+
+static char *ps_matched;
+static const struct cache_entry **ce_entries;
+static int ce_alloc, ce_used;
+static struct pathspec pathspec;
+static const char *alternative_path;
+
+static void module_list_compute(int argc, const char **argv,
+				const char *prefix,
+				struct pathspec *pathspec)
+{
+	int i;
+	char *max_prefix;
+	int max_prefix_len;
+	parse_pathspec(pathspec, 0,
+		       PATHSPEC_PREFER_FULL |
+		       PATHSPEC_STRIP_SUBMODULE_SLASH_CHEAP,
+		       prefix, argv);
+
+	/* Find common prefix for all pathspec's */
+	max_prefix = common_prefix(pathspec);
+	max_prefix_len = max_prefix ? strlen(max_prefix) : 0;
+
+	if (pathspec->nr)
+		ps_matched = xcalloc(1, pathspec->nr);
+
+
+	if (read_cache() < 0)
+		die("index file corrupt");
+
+	for (i = 0; i < active_nr; i++) {
+		const struct cache_entry *ce = active_cache[i];
+
+		if (!match_pathspec(pathspec, ce->name, ce_namelen(ce),
+				    max_prefix_len, ps_matched,
+				    S_ISGITLINK(ce->ce_mode) | S_ISDIR(ce->ce_mode)))
+			continue;
+
+		if (S_ISGITLINK(ce->ce_mode)) {
+			ALLOC_GROW(ce_entries, ce_used + 1, ce_alloc);
+			ce_entries[ce_used++] = ce;
+		}
+	}
+}
+
+static int module_list(int argc, const char **argv, const char *prefix)
+{
+	int i;
+	struct string_list already_printed = STRING_LIST_INIT_NODUP;
+
+	struct option module_list_options[] = {
+		OPT_STRING(0, "prefix", &alternative_path,
+			   N_("path"),
+			   N_("alternative anchor for relative paths")),
+		OPT_END()
+	};
+
+	static const char * const git_submodule_helper_usage[] = {
+		N_("git submodule--helper module_list [--prefix=<path>] [<path>...]"),
+		NULL
+	};
+
+	argc = parse_options(argc, argv, prefix, module_list_options,
+			     git_submodule_helper_usage, 0);
+
+	module_list_compute(argc, argv, alternative_path
+					? alternative_path
+					: prefix, &pathspec);
+
+	if (ps_matched && report_path_error(ps_matched, &pathspec, prefix)) {
+		printf("#unmatched\n");
+		return 1;
+	}
+
+	for (i = 0; i < ce_used; i++) {
+		const struct cache_entry *ce = ce_entries[i];
+
+		if (string_list_has_string(&already_printed, ce->name))
+			continue;
+
+		if (ce_stage(ce)) {
+			printf("%06o %s U\t", ce->ce_mode, sha1_to_hex(null_sha1));
+		} else {
+			printf("%06o %s %d\t", ce->ce_mode, sha1_to_hex(ce->sha1), ce_stage(ce));
+		}
+
+		utf8_fprintf(stdout, "%s\n", ce->name);
+
+		string_list_insert(&already_printed, ce->name);
+	}
+	return 0;
+}
+
+int cmd_submodule__helper(int argc, const char **argv, const char *prefix)
+{
+	if (argc < 2)
+		goto usage;
+
+	if (!strcmp(argv[1], "module_list"))
+		return module_list(argc - 1, argv + 1, prefix);
+
+usage:
+	usage("git submodule--helper module_list\n");
+}
diff --git a/git-submodule.sh b/git-submodule.sh
index 36797c3..af9ecef 100755
--- a/git-submodule.sh
+++ b/git-submodule.sh
@@ -145,48 +145,6 @@ relative_path ()
 	echo "$result$target"
 }
 
-#
-# Get submodule info for registered submodules
-# $@ = path to limit submodule list
-#
-module_list()
-{
-	eval "set $(git rev-parse --sq --prefix "$wt_prefix" -- "$@")"
-	(
-		git ls-files -z --error-unmatch --stage -- "$@" ||
-		echo "unmatched pathspec exists"
-	) |
-	@@PERL@@ -e '
-	my %unmerged = ();
-	my ($null_sha1) = ("0" x 40);
-	my @out = ();
-	my $unmatched = 0;
-	$/ = "\0";
-	while (<STDIN>) {
-		if (/^unmatched pathspec/) {
-			$unmatched = 1;
-			next;
-		}
-		chomp;
-		my ($mode, $sha1, $stage, $path) =
-			/^([0-7]+) ([0-9a-f]{40}) ([0-3])\t(.*)$/;
-		next unless $mode eq "160000";
-		if ($stage ne "0") {
-			if (!$unmerged{$path}++) {
-				push @out, "$mode $null_sha1 U\t$path\n";
-			}
-			next;
-		}
-		push @out, "$_\n";
-	}
-	if ($unmatched) {
-		print "#unmatched\n";
-	} else {
-		print for (@out);
-	}
-	'
-}
-
 die_if_unmatched ()
 {
 	if test "$1" = "#unmatched"
@@ -532,7 +490,7 @@ cmd_foreach()
 	# command in the subshell (and a recursive call to this function)
 	exec 3<&0
 
-	module_list |
+	git submodule--helper module_list --prefix "$wt_prefix"|
 	while read mode sha1 stage sm_path
 	do
 		die_if_unmatched "$mode"
@@ -592,7 +550,7 @@ cmd_init()
 		shift
 	done
 
-	module_list "$@" |
+	git submodule--helper module_list --prefix "$wt_prefix" "$@" |
 	while read mode sha1 stage sm_path
 	do
 		die_if_unmatched "$mode"
@@ -674,7 +632,7 @@ cmd_deinit()
 		die "$(eval_gettext "Use '.' if you really want to deinitialize all submodules")"
 	fi
 
-	module_list "$@" |
+	git submodule--helper module_list --prefix "$wt_prefix" "$@" |
 	while read mode sha1 stage sm_path
 	do
 		die_if_unmatched "$mode"
@@ -790,7 +748,7 @@ cmd_update()
 	fi
 
 	cloned_modules=
-	module_list "$@" | {
+	git submodule--helper module_list --prefix "$wt_prefix" "$@" | {
 	err=
 	while read mode sha1 stage sm_path
 	do
@@ -1222,7 +1180,7 @@ cmd_status()
 		shift
 	done
 
-	module_list "$@" |
+	git submodule--helper module_list --prefix "$wt_prefix" "$@" |
 	while read mode sha1 stage sm_path
 	do
 		die_if_unmatched "$mode"
@@ -1299,7 +1257,7 @@ cmd_sync()
 		esac
 	done
 	cd_to_toplevel
-	module_list "$@" |
+	git submodule--helper module_list --prefix "$wt_prefix" "$@" |
 	while read mode sha1 stage sm_path
 	do
 		die_if_unmatched "$mode"
diff --git a/git.c b/git.c
index 55c327c..deecba0 100644
--- a/git.c
+++ b/git.c
@@ -469,6 +469,7 @@ static struct cmd_struct commands[] = {
 	{ "stage", cmd_add, RUN_SETUP | NEED_WORK_TREE },
 	{ "status", cmd_status, RUN_SETUP | NEED_WORK_TREE },
 	{ "stripspace", cmd_stripspace },
+	{ "submodule--helper", cmd_submodule__helper, RUN_SETUP },
 	{ "symbolic-ref", cmd_symbolic_ref, RUN_SETUP },
 	{ "tag", cmd_tag, RUN_SETUP },
 	{ "unpack-file", cmd_unpack_file, RUN_SETUP },
-- 
2.5.0.2.gbb9888f.dirty

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 2/4] submodule: implement `module_name` as a builtin helper
  2015-08-05  0:04 [PATCH 1/4] submodule: implement `module_list` as a builtin helper Stefan Beller
@ 2015-08-05  0:04 ` Stefan Beller
  2015-08-05  0:05   ` Stefan Beller
                     ` (2 more replies)
  2015-08-05 18:31 ` [PATCH 1/4] submodule: implement `module_list` " Jens Lehmann
  2015-08-07 19:53 ` Junio C Hamano
  2 siblings, 3 replies; 27+ messages in thread
From: Stefan Beller @ 2015-08-05  0:04 UTC (permalink / raw)
  To: gitster; +Cc: git, jens.lehmann, hvoigt, Stefan Beller

The goal of this series being rewriting `git submodule update`,
we don't want to call out to the shell script for config lookups.

So reimplement the lookup of the submodule name in C.

Signed-off-by: Stefan Beller <sbeller@google.com>
---

 When I started to implement git submodule add in the helper, I realized
 the very first thing to be done would be module_name translated to C,
 so I did that separately. Maybe we need to split this up as well into two
 separate steps for processing and I/O, such that it can be reused better
 from a future "git submodule--helper update" function

 builtin/submodule--helper.c | 47 +++++++++++++++++++++++++++++++++++++++++++++
 git-submodule.sh            | 32 +++++++-----------------------
 2 files changed, 54 insertions(+), 25 deletions(-)

diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
index cb18ddf..dd5635f 100644
--- a/builtin/submodule--helper.c
+++ b/builtin/submodule--helper.c
@@ -5,6 +5,8 @@
 #include "pathspec.h"
 #include "dir.h"
 #include "utf8.h"
+#include "run-command.h"
+#include "string-list.h"
 
 static char *ps_matched;
 static const struct cache_entry **ce_entries;
@@ -98,6 +100,48 @@ static int module_list(int argc, const char **argv, const char *prefix)
 	return 0;
 }
 
+
+static int collect_module_names(const char *key, const char *value, void *cb)
+{
+	size_t len;
+	struct string_list *sl = cb;
+
+	if (starts_with(key, "submodule.")
+	    && strip_suffix(key, ".path", &len)) {
+		struct strbuf sb = STRBUF_INIT;
+		strbuf_add(&sb, key + strlen("submodule."),
+				len - strlen("submodule."));
+		string_list_insert(sl, value)->util = strbuf_detach(&sb, NULL);
+		strbuf_release(&sb);
+	}
+
+	return 0;
+}
+
+static int module_name(int argc, const char **argv, const char *prefix)
+{
+	struct string_list_item *item;
+	struct git_config_source config_source;
+	struct string_list values = STRING_LIST_INIT_DUP;
+
+	if (!argc)
+		usage("git submodule--helper module_name <path>\n");
+
+	memset(&config_source, 0, sizeof(config_source));
+	config_source.file = ".gitmodules";
+
+	if (git_config_with_options(collect_module_names, &values,
+				    &config_source, 1) < 0)
+		die(_("unknown error occured while reading the git modules file"));
+
+	item = string_list_lookup(&values, argv[0]);
+	if (item)
+		printf("%s\n", (char*)item->util);
+	else
+		die("No submodule mapping found in .gitmodules for path '%s'", argv[0]);
+	return 0;
+}
+
 int cmd_submodule__helper(int argc, const char **argv, const char *prefix)
 {
 	if (argc < 2)
@@ -106,6 +150,9 @@ int cmd_submodule__helper(int argc, const char **argv, const char *prefix)
 	if (!strcmp(argv[1], "module_list"))
 		return module_list(argc - 1, argv + 1, prefix);
 
+	if (!strcmp(argv[1], "module_name"))
+		return module_name(argc - 2, argv + 2, prefix);
+
 usage:
 	usage("git submodule--helper module_list\n");
 }
diff --git a/git-submodule.sh b/git-submodule.sh
index af9ecef..e6ff38d 100755
--- a/git-submodule.sh
+++ b/git-submodule.sh
@@ -178,24 +178,6 @@ get_submodule_config () {
 	printf '%s' "${value:-$default}"
 }
 
-
-#
-# Map submodule path to submodule name
-#
-# $1 = path
-#
-module_name()
-{
-	# Do we have "submodule.<something>.path = $1" defined in .gitmodules file?
-	sm_path="$1"
-	re=$(printf '%s\n' "$1" | sed -e 's/[].[^$\\*]/\\&/g')
-	name=$( git config -f .gitmodules --get-regexp '^submodule\..*\.path$' |
-		sed -n -e 's|^submodule\.\(.*\)\.path '"$re"'$|\1|p' )
-	test -z "$name" &&
-	die "$(eval_gettext "No submodule mapping found in .gitmodules for path '\$sm_path'")"
-	printf '%s\n' "$name"
-}
-
 #
 # Clone a submodule
 #
@@ -498,7 +480,7 @@ cmd_foreach()
 		then
 			displaypath=$(relative_path "$sm_path")
 			say "$(eval_gettext "Entering '\$prefix\$displaypath'")"
-			name=$(module_name "$sm_path")
+			name=$(git submodule--helper module_name "$sm_path")
 			(
 				prefix="$prefix$sm_path/"
 				clear_local_git_env
@@ -554,7 +536,7 @@ cmd_init()
 	while read mode sha1 stage sm_path
 	do
 		die_if_unmatched "$mode"
-		name=$(module_name "$sm_path") || exit
+		name=$(git submodule--helper module_name "$sm_path") || exit
 
 		displaypath=$(relative_path "$sm_path")
 
@@ -636,7 +618,7 @@ cmd_deinit()
 	while read mode sha1 stage sm_path
 	do
 		die_if_unmatched "$mode"
-		name=$(module_name "$sm_path") || exit
+		name=$(git submodule--helper module_name "$sm_path") || exit
 
 		displaypath=$(relative_path "$sm_path")
 
@@ -758,7 +740,7 @@ cmd_update()
 			echo >&2 "Skipping unmerged submodule $prefix$sm_path"
 			continue
 		fi
-		name=$(module_name "$sm_path") || exit
+		name=$(git submodule--helper module_name "$sm_path") || exit
 		url=$(git config submodule."$name".url)
 		branch=$(get_submodule_config "$name" branch master)
 		if ! test -z "$update"
@@ -1022,7 +1004,7 @@ cmd_summary() {
 			# Respect the ignore setting for --for-status.
 			if test -n "$for_status"
 			then
-				name=$(module_name "$sm_path")
+				name=$(git submodule--helper module_name "$sm_path")
 				ignore_config=$(get_submodule_config "$name" ignore none)
 				test $status != A && test $ignore_config = all && continue
 			fi
@@ -1184,7 +1166,7 @@ cmd_status()
 	while read mode sha1 stage sm_path
 	do
 		die_if_unmatched "$mode"
-		name=$(module_name "$sm_path") || exit
+		name=$(git submodule--helper module_name "$sm_path") || exit
 		url=$(git config submodule."$name".url)
 		displaypath=$(relative_path "$prefix$sm_path")
 		if test "$stage" = U
@@ -1261,7 +1243,7 @@ cmd_sync()
 	while read mode sha1 stage sm_path
 	do
 		die_if_unmatched "$mode"
-		name=$(module_name "$sm_path")
+		name=$(git submodule--helper module_name "$sm_path")
 		url=$(git config -f .gitmodules --get submodule."$name".url)
 
 		# Possibly a url relative to parent
-- 
2.5.0.2.gbb9888f.dirty

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* Re: [PATCH 2/4] submodule: implement `module_name` as a builtin helper
  2015-08-05  0:04 ` [PATCH 2/4] submodule: implement `module_name` " Stefan Beller
@ 2015-08-05  0:05   ` Stefan Beller
  2015-08-05  0:58   ` Eric Sunshine
  2015-08-05 19:06   ` Jens Lehmann
  2 siblings, 0 replies; 27+ messages in thread
From: Stefan Beller @ 2015-08-05  0:05 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Jens Lehmann, Heiko Voigt, Stefan Beller

The series consists of 2 patches only actually. The next patches have
not been sent as they are heavy WIP.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 2/4] submodule: implement `module_name` as a builtin helper
  2015-08-05  0:04 ` [PATCH 2/4] submodule: implement `module_name` " Stefan Beller
  2015-08-05  0:05   ` Stefan Beller
@ 2015-08-05  0:58   ` Eric Sunshine
  2015-08-05 16:29     ` Stefan Beller
  2015-08-05 19:06   ` Jens Lehmann
  2 siblings, 1 reply; 27+ messages in thread
From: Eric Sunshine @ 2015-08-05  0:58 UTC (permalink / raw)
  To: Stefan Beller; +Cc: Junio C Hamano, Git List, Jens Lehmann, Heiko Voigt

On Tue, Aug 4, 2015 at 8:04 PM, Stefan Beller <sbeller@google.com> wrote:
> The goal of this series being rewriting `git submodule update`,
> we don't want to call out to the shell script for config lookups.
>
> So reimplement the lookup of the submodule name in C.
>
> Signed-off-by: Stefan Beller <sbeller@google.com>
> ---
> diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
> index cb18ddf..dd5635f 100644
> --- a/builtin/submodule--helper.c
> +++ b/builtin/submodule--helper.c
> @@ -98,6 +100,48 @@ static int module_list(int argc, const char **argv, const char *prefix)
> +static int collect_module_names(const char *key, const char *value, void *cb)
> +{
> +       size_t len;
> +       struct string_list *sl = cb;
> +
> +       if (starts_with(key, "submodule.")
> +           && strip_suffix(key, ".path", &len)) {
> +               struct strbuf sb = STRBUF_INIT;
> +               strbuf_add(&sb, key + strlen("submodule."),
> +                               len - strlen("submodule."));
> +               string_list_insert(sl, value)->util = strbuf_detach(&sb, NULL);
> +               strbuf_release(&sb);

Why the complexity and overhead of a strbuf when the same could be
accomplished more easily and straightforwardly with xstrndup()?

> +       }
> +
> +       return 0;
> +}
> +
> +static int module_name(int argc, const char **argv, const char *prefix)
> +{
> +       struct string_list_item *item;
> +       struct git_config_source config_source;
> +       struct string_list values = STRING_LIST_INIT_DUP;
> +
> +       if (!argc)

Do you mean?

    if (argc != 1)

> +               usage("git submodule--helper module_name <path>\n");
> +
> +       memset(&config_source, 0, sizeof(config_source));
> +       config_source.file = ".gitmodules";
> +
> +       if (git_config_with_options(collect_module_names, &values,
> +                                   &config_source, 1) < 0)
> +               die(_("unknown error occured while reading the git modules file"));
> +
> +       item = string_list_lookup(&values, argv[0]);
> +       if (item)
> +               printf("%s\n", (char*)item->util);
> +       else
> +               die("No submodule mapping found in .gitmodules for path '%s'", argv[0]);
> +       return 0;
> +}
> +
>  int cmd_submodule__helper(int argc, const char **argv, const char *prefix)
>  {
>         if (argc < 2)
> @@ -106,6 +150,9 @@ int cmd_submodule__helper(int argc, const char **argv, const char *prefix)
>         if (!strcmp(argv[1], "module_list"))
>                 return module_list(argc - 1, argv + 1, prefix);
>
> +       if (!strcmp(argv[1], "module_name"))
> +               return module_name(argc - 2, argv + 2, prefix);
> +
>  usage:
>         usage("git submodule--helper module_list\n");
>  }

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 2/4] submodule: implement `module_name` as a builtin helper
  2015-08-05  0:58   ` Eric Sunshine
@ 2015-08-05 16:29     ` Stefan Beller
  0 siblings, 0 replies; 27+ messages in thread
From: Stefan Beller @ 2015-08-05 16:29 UTC (permalink / raw)
  To: Eric Sunshine; +Cc: Junio C Hamano, Git List, Jens Lehmann, Heiko Voigt

On Tue, Aug 4, 2015 at 5:58 PM, Eric Sunshine <sunshine@sunshineco.com> wrote:
> On Tue, Aug 4, 2015 at 8:04 PM, Stefan Beller <sbeller@google.com> wrote:
>> The goal of this series being rewriting `git submodule update`,
>> we don't want to call out to the shell script for config lookups.
>>
>> So reimplement the lookup of the submodule name in C.
>>
>> Signed-off-by: Stefan Beller <sbeller@google.com>
>> ---
>> diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
>> index cb18ddf..dd5635f 100644
>> --- a/builtin/submodule--helper.c
>> +++ b/builtin/submodule--helper.c
>> @@ -98,6 +100,48 @@ static int module_list(int argc, const char **argv, const char *prefix)
>> +static int collect_module_names(const char *key, const char *value, void *cb)
>> +{
>> +       size_t len;
>> +       struct string_list *sl = cb;
>> +
>> +       if (starts_with(key, "submodule.")
>> +           && strip_suffix(key, ".path", &len)) {
>> +               struct strbuf sb = STRBUF_INIT;
>> +               strbuf_add(&sb, key + strlen("submodule."),
>> +                               len - strlen("submodule."));
>> +               string_list_insert(sl, value)->util = strbuf_detach(&sb, NULL);
>> +               strbuf_release(&sb);
>
> Why the complexity and overhead of a strbuf when the same could be
> accomplished more easily and straightforwardly with xstrndup()?

fixed.

>
>> +       }
>> +
>> +       return 0;
>> +}
>> +
>> +static int module_name(int argc, const char **argv, const char *prefix)
>> +{
>> +       struct string_list_item *item;
>> +       struct git_config_source config_source;
>> +       struct string_list values = STRING_LIST_INIT_DUP;
>> +
>> +       if (!argc)
>
> Do you mean?
>
>     if (argc != 1)

doh! Yes I meant that.

>
>> +               usage("git submodule--helper module_name <path>\n");
>> +
>> +       memset(&config_source, 0, sizeof(config_source));
>> +       config_source.file = ".gitmodules";
>> +
>> +       if (git_config_with_options(collect_module_names, &values,
>> +                                   &config_source, 1) < 0)
>> +               die(_("unknown error occured while reading the git modules file"));
>> +
>> +       item = string_list_lookup(&values, argv[0]);
>> +       if (item)
>> +               printf("%s\n", (char*)item->util);
>> +       else
>> +               die("No submodule mapping found in .gitmodules for path '%s'", argv[0]);
>> +       return 0;
>> +}
>> +
>>  int cmd_submodule__helper(int argc, const char **argv, const char *prefix)
>>  {
>>         if (argc < 2)
>> @@ -106,6 +150,9 @@ int cmd_submodule__helper(int argc, const char **argv, const char *prefix)
>>         if (!strcmp(argv[1], "module_list"))
>>                 return module_list(argc - 1, argv + 1, prefix);
>>
>> +       if (!strcmp(argv[1], "module_name"))
>> +               return module_name(argc - 2, argv + 2, prefix);
>> +
>>  usage:
>>         usage("git submodule--helper module_list\n");
>>  }

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 1/4] submodule: implement `module_list` as a builtin helper
  2015-08-05  0:04 [PATCH 1/4] submodule: implement `module_list` as a builtin helper Stefan Beller
  2015-08-05  0:04 ` [PATCH 2/4] submodule: implement `module_name` " Stefan Beller
@ 2015-08-05 18:31 ` Jens Lehmann
  2015-08-07 19:53 ` Junio C Hamano
  2 siblings, 0 replies; 27+ messages in thread
From: Jens Lehmann @ 2015-08-05 18:31 UTC (permalink / raw)
  To: Stefan Beller, gitster; +Cc: git, hvoigt

Am 05.08.2015 um 02:04 schrieb Stefan Beller:
> Most of the submodule operations work on a set of submodules.
> Calculating and using this set is usually done via:
>
>         module_list "$@" | {
>             while read mode sha1 stage sm_path
>             do
>                  # the actual operation
>             done
>         }
>
> Currently the function `module_list` is implemented in the
> git-submodule.sh as a shell script wrapping a perl script.
> The rewrite is in C, such that it is faster and can later be
> easily adapted when other functions are rewritten in C.
>
> git-submodule.sh similar to the builtin commands will navigate
> to the top most directory of the repository and keeping the
> subdirectories as a variable. As the helper is called from
> within the git-submodule.sh script, we are already navigated
> to the root level, but the path arguments are stil relative
> to the subdirectory we were in when calling git-submodule.sh.
> That's why there is a `--prefix` option pointing to an alternative
> path where to anchor relative path arguments.

Great you are working on this! I'll try to help, but you might
see some latency as my Git time budget is currently very limited.

I think this patch is definitely going into the right direction.
The whole test suite runs 3 seconds faster for me with this
applied: best of three is 3:16 without and 3:13 with this patch.
That's quite an improvement, especially as only parts of the test
suite deal with submodules! (And I expect Windows users to profit
even more, considered how expensive forking is there)

Acked-by: Jens Lehmann <Jens.Lehmann@web.de>

> Signed-off-by: Stefan Beller <sbeller@google.com>
> ---
>
> The same as yesterday evening, just an entry added to .gitignore.
>
> So we'll have a "git submodule--helper module_list" here.
>
>   .gitignore                  |   1 +
>   Makefile                    |   1 +
>   builtin.h                   |   1 +
>   builtin/submodule--helper.c | 111 ++++++++++++++++++++++++++++++++++++++++++++
>   git-submodule.sh            |  54 +++------------------
>   git.c                       |   1 +
>   6 files changed, 121 insertions(+), 48 deletions(-)
>   create mode 100644 builtin/submodule--helper.c
>
> diff --git a/.gitignore b/.gitignore
> index a685ec1..2a69ba0 100644
> --- a/.gitignore
> +++ b/.gitignore
> @@ -155,6 +155,7 @@
>   /git-status
>   /git-stripspace
>   /git-submodule
> +/git-submodule--helper
>   /git-svn
>   /git-symbolic-ref
>   /git-tag
> diff --git a/Makefile b/Makefile
> index 7efedbe..460d17a 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -899,6 +899,7 @@ BUILTIN_OBJS += builtin/shortlog.o
>   BUILTIN_OBJS += builtin/show-branch.o
>   BUILTIN_OBJS += builtin/show-ref.o
>   BUILTIN_OBJS += builtin/stripspace.o
> +BUILTIN_OBJS += builtin/submodule--helper.o
>   BUILTIN_OBJS += builtin/symbolic-ref.o
>   BUILTIN_OBJS += builtin/tag.o
>   BUILTIN_OBJS += builtin/unpack-file.o
> diff --git a/builtin.h b/builtin.h
> index 839483d..924e6c4 100644
> --- a/builtin.h
> +++ b/builtin.h
> @@ -119,6 +119,7 @@ extern int cmd_show(int argc, const char **argv, const char *prefix);
>   extern int cmd_show_branch(int argc, const char **argv, const char *prefix);
>   extern int cmd_status(int argc, const char **argv, const char *prefix);
>   extern int cmd_stripspace(int argc, const char **argv, const char *prefix);
> +extern int cmd_submodule__helper(int argc, const char **argv, const char *prefix);
>   extern int cmd_symbolic_ref(int argc, const char **argv, const char *prefix);
>   extern int cmd_tag(int argc, const char **argv, const char *prefix);
>   extern int cmd_tar_tree(int argc, const char **argv, const char *prefix);
> diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
> new file mode 100644
> index 0000000..cb18ddf
> --- /dev/null
> +++ b/builtin/submodule--helper.c
> @@ -0,0 +1,111 @@
> +#include "builtin.h"
> +#include "cache.h"
> +#include "parse-options.h"
> +#include "quote.h"
> +#include "pathspec.h"
> +#include "dir.h"
> +#include "utf8.h"
> +
> +static char *ps_matched;
> +static const struct cache_entry **ce_entries;
> +static int ce_alloc, ce_used;
> +static struct pathspec pathspec;
> +static const char *alternative_path;
> +
> +static void module_list_compute(int argc, const char **argv,
> +				const char *prefix,
> +				struct pathspec *pathspec)
> +{
> +	int i;
> +	char *max_prefix;
> +	int max_prefix_len;
> +	parse_pathspec(pathspec, 0,
> +		       PATHSPEC_PREFER_FULL |
> +		       PATHSPEC_STRIP_SUBMODULE_SLASH_CHEAP,
> +		       prefix, argv);
> +
> +	/* Find common prefix for all pathspec's */
> +	max_prefix = common_prefix(pathspec);
> +	max_prefix_len = max_prefix ? strlen(max_prefix) : 0;
> +
> +	if (pathspec->nr)
> +		ps_matched = xcalloc(1, pathspec->nr);
> +
> +
> +	if (read_cache() < 0)
> +		die("index file corrupt");
> +
> +	for (i = 0; i < active_nr; i++) {
> +		const struct cache_entry *ce = active_cache[i];
> +
> +		if (!match_pathspec(pathspec, ce->name, ce_namelen(ce),
> +				    max_prefix_len, ps_matched,
> +				    S_ISGITLINK(ce->ce_mode) | S_ISDIR(ce->ce_mode)))
> +			continue;
> +
> +		if (S_ISGITLINK(ce->ce_mode)) {
> +			ALLOC_GROW(ce_entries, ce_used + 1, ce_alloc);
> +			ce_entries[ce_used++] = ce;
> +		}
> +	}
> +}
> +
> +static int module_list(int argc, const char **argv, const char *prefix)
> +{
> +	int i;
> +	struct string_list already_printed = STRING_LIST_INIT_NODUP;
> +
> +	struct option module_list_options[] = {
> +		OPT_STRING(0, "prefix", &alternative_path,
> +			   N_("path"),
> +			   N_("alternative anchor for relative paths")),
> +		OPT_END()
> +	};
> +
> +	static const char * const git_submodule_helper_usage[] = {
> +		N_("git submodule--helper module_list [--prefix=<path>] [<path>...]"),
> +		NULL
> +	};
> +
> +	argc = parse_options(argc, argv, prefix, module_list_options,
> +			     git_submodule_helper_usage, 0);
> +
> +	module_list_compute(argc, argv, alternative_path
> +					? alternative_path
> +					: prefix, &pathspec);
> +
> +	if (ps_matched && report_path_error(ps_matched, &pathspec, prefix)) {
> +		printf("#unmatched\n");
> +		return 1;
> +	}
> +
> +	for (i = 0; i < ce_used; i++) {
> +		const struct cache_entry *ce = ce_entries[i];
> +
> +		if (string_list_has_string(&already_printed, ce->name))
> +			continue;
> +
> +		if (ce_stage(ce)) {
> +			printf("%06o %s U\t", ce->ce_mode, sha1_to_hex(null_sha1));
> +		} else {
> +			printf("%06o %s %d\t", ce->ce_mode, sha1_to_hex(ce->sha1), ce_stage(ce));
> +		}
> +
> +		utf8_fprintf(stdout, "%s\n", ce->name);
> +
> +		string_list_insert(&already_printed, ce->name);
> +	}
> +	return 0;
> +}
> +
> +int cmd_submodule__helper(int argc, const char **argv, const char *prefix)
> +{
> +	if (argc < 2)
> +		goto usage;
> +
> +	if (!strcmp(argv[1], "module_list"))
> +		return module_list(argc - 1, argv + 1, prefix);
> +
> +usage:
> +	usage("git submodule--helper module_list\n");
> +}
> diff --git a/git-submodule.sh b/git-submodule.sh
> index 36797c3..af9ecef 100755
> --- a/git-submodule.sh
> +++ b/git-submodule.sh
> @@ -145,48 +145,6 @@ relative_path ()
>   	echo "$result$target"
>   }
>
> -#
> -# Get submodule info for registered submodules
> -# $@ = path to limit submodule list
> -#
> -module_list()
> -{
> -	eval "set $(git rev-parse --sq --prefix "$wt_prefix" -- "$@")"
> -	(
> -		git ls-files -z --error-unmatch --stage -- "$@" ||
> -		echo "unmatched pathspec exists"
> -	) |
> -	@@PERL@@ -e '
> -	my %unmerged = ();
> -	my ($null_sha1) = ("0" x 40);
> -	my @out = ();
> -	my $unmatched = 0;
> -	$/ = "\0";
> -	while (<STDIN>) {
> -		if (/^unmatched pathspec/) {
> -			$unmatched = 1;
> -			next;
> -		}
> -		chomp;
> -		my ($mode, $sha1, $stage, $path) =
> -			/^([0-7]+) ([0-9a-f]{40}) ([0-3])\t(.*)$/;
> -		next unless $mode eq "160000";
> -		if ($stage ne "0") {
> -			if (!$unmerged{$path}++) {
> -				push @out, "$mode $null_sha1 U\t$path\n";
> -			}
> -			next;
> -		}
> -		push @out, "$_\n";
> -	}
> -	if ($unmatched) {
> -		print "#unmatched\n";
> -	} else {
> -		print for (@out);
> -	}
> -	'
> -}
> -
>   die_if_unmatched ()
>   {
>   	if test "$1" = "#unmatched"
> @@ -532,7 +490,7 @@ cmd_foreach()
>   	# command in the subshell (and a recursive call to this function)
>   	exec 3<&0
>
> -	module_list |
> +	git submodule--helper module_list --prefix "$wt_prefix"|
>   	while read mode sha1 stage sm_path
>   	do
>   		die_if_unmatched "$mode"
> @@ -592,7 +550,7 @@ cmd_init()
>   		shift
>   	done
>
> -	module_list "$@" |
> +	git submodule--helper module_list --prefix "$wt_prefix" "$@" |
>   	while read mode sha1 stage sm_path
>   	do
>   		die_if_unmatched "$mode"
> @@ -674,7 +632,7 @@ cmd_deinit()
>   		die "$(eval_gettext "Use '.' if you really want to deinitialize all submodules")"
>   	fi
>
> -	module_list "$@" |
> +	git submodule--helper module_list --prefix "$wt_prefix" "$@" |
>   	while read mode sha1 stage sm_path
>   	do
>   		die_if_unmatched "$mode"
> @@ -790,7 +748,7 @@ cmd_update()
>   	fi
>
>   	cloned_modules=
> -	module_list "$@" | {
> +	git submodule--helper module_list --prefix "$wt_prefix" "$@" | {
>   	err=
>   	while read mode sha1 stage sm_path
>   	do
> @@ -1222,7 +1180,7 @@ cmd_status()
>   		shift
>   	done
>
> -	module_list "$@" |
> +	git submodule--helper module_list --prefix "$wt_prefix" "$@" |
>   	while read mode sha1 stage sm_path
>   	do
>   		die_if_unmatched "$mode"
> @@ -1299,7 +1257,7 @@ cmd_sync()
>   		esac
>   	done
>   	cd_to_toplevel
> -	module_list "$@" |
> +	git submodule--helper module_list --prefix "$wt_prefix" "$@" |
>   	while read mode sha1 stage sm_path
>   	do
>   		die_if_unmatched "$mode"
> diff --git a/git.c b/git.c
> index 55c327c..deecba0 100644
> --- a/git.c
> +++ b/git.c
> @@ -469,6 +469,7 @@ static struct cmd_struct commands[] = {
>   	{ "stage", cmd_add, RUN_SETUP | NEED_WORK_TREE },
>   	{ "status", cmd_status, RUN_SETUP | NEED_WORK_TREE },
>   	{ "stripspace", cmd_stripspace },
> +	{ "submodule--helper", cmd_submodule__helper, RUN_SETUP },
>   	{ "symbolic-ref", cmd_symbolic_ref, RUN_SETUP },
>   	{ "tag", cmd_tag, RUN_SETUP },
>   	{ "unpack-file", cmd_unpack_file, RUN_SETUP },
>

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 2/4] submodule: implement `module_name` as a builtin helper
  2015-08-05  0:04 ` [PATCH 2/4] submodule: implement `module_name` " Stefan Beller
  2015-08-05  0:05   ` Stefan Beller
  2015-08-05  0:58   ` Eric Sunshine
@ 2015-08-05 19:06   ` Jens Lehmann
  2015-08-05 19:55     ` Stefan Beller
  2 siblings, 1 reply; 27+ messages in thread
From: Jens Lehmann @ 2015-08-05 19:06 UTC (permalink / raw)
  To: Stefan Beller, gitster; +Cc: git, hvoigt

Am 05.08.2015 um 02:04 schrieb Stefan Beller:
> The goal of this series being rewriting `git submodule update`,
> we don't want to call out to the shell script for config lookups.
>
> So reimplement the lookup of the submodule name in C.

Cool. This brings down the duration of the test suite from 3:13
to 3:12 for me (best of three).

You might wanna have a look into submodule.c: after initially
calling gitmodules_config() one can lookup the submodule name
in the static "config_name_for_path" string_list. If you'd add
a public method to submodule.c which accesses that string_list
and returns the name for the given path, you won't need your
two new functions ... or am I missing something?

> Signed-off-by: Stefan Beller <sbeller@google.com>
> ---
>
>   When I started to implement git submodule add in the helper, I realized
>   the very first thing to be done would be module_name translated to C,
>   so I did that separately. Maybe we need to split this up as well into two
>   separate steps for processing and I/O, such that it can be reused better
>   from a future "git submodule--helper update" function
>
>   builtin/submodule--helper.c | 47 +++++++++++++++++++++++++++++++++++++++++++++
>   git-submodule.sh            | 32 +++++++-----------------------
>   2 files changed, 54 insertions(+), 25 deletions(-)
>
> diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
> index cb18ddf..dd5635f 100644
> --- a/builtin/submodule--helper.c
> +++ b/builtin/submodule--helper.c
> @@ -5,6 +5,8 @@
>   #include "pathspec.h"
>   #include "dir.h"
>   #include "utf8.h"
> +#include "run-command.h"
> +#include "string-list.h"
>
>   static char *ps_matched;
>   static const struct cache_entry **ce_entries;
> @@ -98,6 +100,48 @@ static int module_list(int argc, const char **argv, const char *prefix)
>   	return 0;
>   }
>
> +
> +static int collect_module_names(const char *key, const char *value, void *cb)
> +{
> +	size_t len;
> +	struct string_list *sl = cb;
> +
> +	if (starts_with(key, "submodule.")
> +	    && strip_suffix(key, ".path", &len)) {
> +		struct strbuf sb = STRBUF_INIT;
> +		strbuf_add(&sb, key + strlen("submodule."),
> +				len - strlen("submodule."));
> +		string_list_insert(sl, value)->util = strbuf_detach(&sb, NULL);
> +		strbuf_release(&sb);
> +	}
> +
> +	return 0;
> +}
> +
> +static int module_name(int argc, const char **argv, const char *prefix)
> +{
> +	struct string_list_item *item;
> +	struct git_config_source config_source;
> +	struct string_list values = STRING_LIST_INIT_DUP;
> +
> +	if (!argc)
> +		usage("git submodule--helper module_name <path>\n");
> +
> +	memset(&config_source, 0, sizeof(config_source));
> +	config_source.file = ".gitmodules";
> +
> +	if (git_config_with_options(collect_module_names, &values,
> +				    &config_source, 1) < 0)
> +		die(_("unknown error occured while reading the git modules file"));
> +
> +	item = string_list_lookup(&values, argv[0]);
> +	if (item)
> +		printf("%s\n", (char*)item->util);
> +	else
> +		die("No submodule mapping found in .gitmodules for path '%s'", argv[0]);
> +	return 0;
> +}
> +
>   int cmd_submodule__helper(int argc, const char **argv, const char *prefix)
>   {
>   	if (argc < 2)
> @@ -106,6 +150,9 @@ int cmd_submodule__helper(int argc, const char **argv, const char *prefix)
>   	if (!strcmp(argv[1], "module_list"))
>   		return module_list(argc - 1, argv + 1, prefix);
>
> +	if (!strcmp(argv[1], "module_name"))
> +		return module_name(argc - 2, argv + 2, prefix);
> +
>   usage:
>   	usage("git submodule--helper module_list\n");
>   }
> diff --git a/git-submodule.sh b/git-submodule.sh
> index af9ecef..e6ff38d 100755
> --- a/git-submodule.sh
> +++ b/git-submodule.sh
> @@ -178,24 +178,6 @@ get_submodule_config () {
>   	printf '%s' "${value:-$default}"
>   }
>
> -
> -#
> -# Map submodule path to submodule name
> -#
> -# $1 = path
> -#
> -module_name()
> -{
> -	# Do we have "submodule.<something>.path = $1" defined in .gitmodules file?
> -	sm_path="$1"
> -	re=$(printf '%s\n' "$1" | sed -e 's/[].[^$\\*]/\\&/g')
> -	name=$( git config -f .gitmodules --get-regexp '^submodule\..*\.path$' |
> -		sed -n -e 's|^submodule\.\(.*\)\.path '"$re"'$|\1|p' )
> -	test -z "$name" &&
> -	die "$(eval_gettext "No submodule mapping found in .gitmodules for path '\$sm_path'")"
> -	printf '%s\n' "$name"
> -}
> -
>   #
>   # Clone a submodule
>   #
> @@ -498,7 +480,7 @@ cmd_foreach()
>   		then
>   			displaypath=$(relative_path "$sm_path")
>   			say "$(eval_gettext "Entering '\$prefix\$displaypath'")"
> -			name=$(module_name "$sm_path")
> +			name=$(git submodule--helper module_name "$sm_path")
>   			(
>   				prefix="$prefix$sm_path/"
>   				clear_local_git_env
> @@ -554,7 +536,7 @@ cmd_init()
>   	while read mode sha1 stage sm_path
>   	do
>   		die_if_unmatched "$mode"
> -		name=$(module_name "$sm_path") || exit
> +		name=$(git submodule--helper module_name "$sm_path") || exit
>
>   		displaypath=$(relative_path "$sm_path")
>
> @@ -636,7 +618,7 @@ cmd_deinit()
>   	while read mode sha1 stage sm_path
>   	do
>   		die_if_unmatched "$mode"
> -		name=$(module_name "$sm_path") || exit
> +		name=$(git submodule--helper module_name "$sm_path") || exit
>
>   		displaypath=$(relative_path "$sm_path")
>
> @@ -758,7 +740,7 @@ cmd_update()
>   			echo >&2 "Skipping unmerged submodule $prefix$sm_path"
>   			continue
>   		fi
> -		name=$(module_name "$sm_path") || exit
> +		name=$(git submodule--helper module_name "$sm_path") || exit
>   		url=$(git config submodule."$name".url)
>   		branch=$(get_submodule_config "$name" branch master)
>   		if ! test -z "$update"
> @@ -1022,7 +1004,7 @@ cmd_summary() {
>   			# Respect the ignore setting for --for-status.
>   			if test -n "$for_status"
>   			then
> -				name=$(module_name "$sm_path")
> +				name=$(git submodule--helper module_name "$sm_path")
>   				ignore_config=$(get_submodule_config "$name" ignore none)
>   				test $status != A && test $ignore_config = all && continue
>   			fi
> @@ -1184,7 +1166,7 @@ cmd_status()
>   	while read mode sha1 stage sm_path
>   	do
>   		die_if_unmatched "$mode"
> -		name=$(module_name "$sm_path") || exit
> +		name=$(git submodule--helper module_name "$sm_path") || exit
>   		url=$(git config submodule."$name".url)
>   		displaypath=$(relative_path "$prefix$sm_path")
>   		if test "$stage" = U
> @@ -1261,7 +1243,7 @@ cmd_sync()
>   	while read mode sha1 stage sm_path
>   	do
>   		die_if_unmatched "$mode"
> -		name=$(module_name "$sm_path")
> +		name=$(git submodule--helper module_name "$sm_path")
>   		url=$(git config -f .gitmodules --get submodule."$name".url)
>
>   		# Possibly a url relative to parent
>

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 2/4] submodule: implement `module_name` as a builtin helper
  2015-08-05 19:06   ` Jens Lehmann
@ 2015-08-05 19:55     ` Stefan Beller
  2015-08-05 21:08       ` [PATCH] " Stefan Beller
  0 siblings, 1 reply; 27+ messages in thread
From: Stefan Beller @ 2015-08-05 19:55 UTC (permalink / raw)
  To: Jens Lehmann; +Cc: Junio C Hamano, git, Heiko Voigt

On Wed, Aug 5, 2015 at 12:06 PM, Jens Lehmann <Jens.Lehmann@web.de> wrote:
> Am 05.08.2015 um 02:04 schrieb Stefan Beller:
>>
>> The goal of this series being rewriting `git submodule update`,
>> we don't want to call out to the shell script for config lookups.
>>
>> So reimplement the lookup of the submodule name in C.
>
>
> Cool. This brings down the duration of the test suite from 3:13
> to 3:12 for me (best of three).
>
> You might wanna have a look into submodule.c: after initially
> calling gitmodules_config() one can lookup the submodule name
> in the static "config_name_for_path" string_list. If you'd add
> a public method to submodule.c which accesses that string_list
> and returns the name for the given path, you won't need your
> two new functions ... or am I missing something?

Yes I just realized there is already lots of submodule related code
written in C, so I wanted to look at that as the next step and see what
can be reused and maybe redo the patches reusing code.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH] submodule: implement `module_name` as a builtin helper
  2015-08-05 19:55     ` Stefan Beller
@ 2015-08-05 21:08       ` Stefan Beller
  2015-08-06 19:49         ` Jens Lehmann
  0 siblings, 1 reply; 27+ messages in thread
From: Stefan Beller @ 2015-08-05 21:08 UTC (permalink / raw)
  To: gitster; +Cc: Jens.Lehmann, hvoigt, git, Stefan Beller

This implements the helper `module_name` in C instead of shell,
yielding a nice performance boost.

Before this patch, I measured a time (best out of three):

  $ time ./t7400-submodule-basic.sh  >/dev/null
    real	0m11.066s
    user	0m3.348s
    sys	0m8.534s

With this patch applied I measured (also best out of three)

  $ time ./t7400-submodule-basic.sh  >/dev/null
    real	0m10.063s
    user	0m3.044s
    sys	0m7.487s

Signed-off-by: Stefan Beller <sbeller@google.com>
---

Is this what you had in mind, Jens?

Jonathan pointed me to https://github.com/jlehmann/git-submod-enhancements/wiki
Does it reflect reality (i.e. as time passes code changes)?

I also noticed that you have made quite some changes to submodules on different
branches which are not upstream. Soem changes look familiar (as in "I believe
this is upstream alreaday?" while others look new and exciting to me).
I could not quite get the order yet, though.

 builtin/submodule--helper.c | 23 +++++++++++++++++++++++
 git-submodule.sh            | 32 +++++++-------------------------
 submodule.c                 | 18 +++++++++++++-----
 submodule.h                 |  1 +
 4 files changed, 44 insertions(+), 30 deletions(-)

diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
index cb18ddf..3713c4c 100644
--- a/builtin/submodule--helper.c
+++ b/builtin/submodule--helper.c
@@ -5,6 +5,8 @@
 #include "pathspec.h"
 #include "dir.h"
 #include "utf8.h"
+#include "submodule.h"
+#include "string-list.h"
 
 static char *ps_matched;
 static const struct cache_entry **ce_entries;
@@ -98,6 +100,24 @@ static int module_list(int argc, const char **argv, const char *prefix)
 	return 0;
 }
 
+static int module_name(int argc, const char **argv, const char *prefix)
+{
+	const char *name;
+
+	if (argc != 1)
+		usage("git submodule--helper module_name <path>\n");
+
+	gitmodules_config();
+	name = submodule_name_for_path(argv[0]);
+
+	if (name)
+		printf("%s\n", name);
+	else
+		die("No submodule mapping found in .gitmodules for path '%s'", argv[0]);
+
+	return 0;
+}
+
 int cmd_submodule__helper(int argc, const char **argv, const char *prefix)
 {
 	if (argc < 2)
@@ -106,6 +126,9 @@ int cmd_submodule__helper(int argc, const char **argv, const char *prefix)
 	if (!strcmp(argv[1], "module_list"))
 		return module_list(argc - 1, argv + 1, prefix);
 
+	if (!strcmp(argv[1], "module_name"))
+		return module_name(argc - 2, argv + 2, prefix);
+
 usage:
 	usage("git submodule--helper module_list\n");
 }
diff --git a/git-submodule.sh b/git-submodule.sh
index af9ecef..e6ff38d 100755
--- a/git-submodule.sh
+++ b/git-submodule.sh
@@ -178,24 +178,6 @@ get_submodule_config () {
 	printf '%s' "${value:-$default}"
 }
 
-
-#
-# Map submodule path to submodule name
-#
-# $1 = path
-#
-module_name()
-{
-	# Do we have "submodule.<something>.path = $1" defined in .gitmodules file?
-	sm_path="$1"
-	re=$(printf '%s\n' "$1" | sed -e 's/[].[^$\\*]/\\&/g')
-	name=$( git config -f .gitmodules --get-regexp '^submodule\..*\.path$' |
-		sed -n -e 's|^submodule\.\(.*\)\.path '"$re"'$|\1|p' )
-	test -z "$name" &&
-	die "$(eval_gettext "No submodule mapping found in .gitmodules for path '\$sm_path'")"
-	printf '%s\n' "$name"
-}
-
 #
 # Clone a submodule
 #
@@ -498,7 +480,7 @@ cmd_foreach()
 		then
 			displaypath=$(relative_path "$sm_path")
 			say "$(eval_gettext "Entering '\$prefix\$displaypath'")"
-			name=$(module_name "$sm_path")
+			name=$(git submodule--helper module_name "$sm_path")
 			(
 				prefix="$prefix$sm_path/"
 				clear_local_git_env
@@ -554,7 +536,7 @@ cmd_init()
 	while read mode sha1 stage sm_path
 	do
 		die_if_unmatched "$mode"
-		name=$(module_name "$sm_path") || exit
+		name=$(git submodule--helper module_name "$sm_path") || exit
 
 		displaypath=$(relative_path "$sm_path")
 
@@ -636,7 +618,7 @@ cmd_deinit()
 	while read mode sha1 stage sm_path
 	do
 		die_if_unmatched "$mode"
-		name=$(module_name "$sm_path") || exit
+		name=$(git submodule--helper module_name "$sm_path") || exit
 
 		displaypath=$(relative_path "$sm_path")
 
@@ -758,7 +740,7 @@ cmd_update()
 			echo >&2 "Skipping unmerged submodule $prefix$sm_path"
 			continue
 		fi
-		name=$(module_name "$sm_path") || exit
+		name=$(git submodule--helper module_name "$sm_path") || exit
 		url=$(git config submodule."$name".url)
 		branch=$(get_submodule_config "$name" branch master)
 		if ! test -z "$update"
@@ -1022,7 +1004,7 @@ cmd_summary() {
 			# Respect the ignore setting for --for-status.
 			if test -n "$for_status"
 			then
-				name=$(module_name "$sm_path")
+				name=$(git submodule--helper module_name "$sm_path")
 				ignore_config=$(get_submodule_config "$name" ignore none)
 				test $status != A && test $ignore_config = all && continue
 			fi
@@ -1184,7 +1166,7 @@ cmd_status()
 	while read mode sha1 stage sm_path
 	do
 		die_if_unmatched "$mode"
-		name=$(module_name "$sm_path") || exit
+		name=$(git submodule--helper module_name "$sm_path") || exit
 		url=$(git config submodule."$name".url)
 		displaypath=$(relative_path "$prefix$sm_path")
 		if test "$stage" = U
@@ -1261,7 +1243,7 @@ cmd_sync()
 	while read mode sha1 stage sm_path
 	do
 		die_if_unmatched "$mode"
-		name=$(module_name "$sm_path")
+		name=$(git submodule--helper module_name "$sm_path")
 		url=$(git config -f .gitmodules --get submodule."$name".url)
 
 		# Possibly a url relative to parent
diff --git a/submodule.c b/submodule.c
index 700bbf4..dd8dcb0 100644
--- a/submodule.c
+++ b/submodule.c
@@ -686,6 +686,16 @@ static void calculate_changed_submodule_paths(void)
 	initialized_fetch_ref_tips = 0;
 }
 
+const char* submodule_name_for_path(const char* path)
+{
+	struct string_list_item *item;
+	item = unsorted_string_list_lookup(&config_name_for_path, path);
+	if (item)
+		return item->util;
+	else
+		return NULL;
+}
+
 int fetch_populated_submodules(const struct argv_array *options,
 			       const char *prefix, int command_line_option,
 			       int quiet)
@@ -693,7 +703,7 @@ int fetch_populated_submodules(const struct argv_array *options,
 	int i, result = 0;
 	struct child_process cp = CHILD_PROCESS_INIT;
 	struct argv_array argv = ARGV_ARRAY_INIT;
-	struct string_list_item *name_for_path;
+	const char *name_for_path;
 	const char *work_tree = get_git_work_tree();
 	if (!work_tree)
 		goto out;
@@ -723,10 +733,8 @@ int fetch_populated_submodules(const struct argv_array *options,
 		if (!S_ISGITLINK(ce->ce_mode))
 			continue;
 
-		name = ce->name;
-		name_for_path = unsorted_string_list_lookup(&config_name_for_path, ce->name);
-		if (name_for_path)
-			name = name_for_path->util;
+		name_for_path = submodule_name_for_path(ce->name);
+		name =  name_for_path ? name_for_path : ce->name;
 
 		default_argv = "yes";
 		if (command_line_option == RECURSE_SUBMODULES_DEFAULT) {
diff --git a/submodule.h b/submodule.h
index 7beec48..e3dd854 100644
--- a/submodule.h
+++ b/submodule.h
@@ -41,5 +41,6 @@ int find_unpushed_submodules(unsigned char new_sha1[20], const char *remotes_nam
 		struct string_list *needs_pushing);
 int push_unpushed_submodules(unsigned char new_sha1[20], const char *remotes_name);
 void connect_work_tree_and_git_dir(const char *work_tree, const char *git_dir);
+const char* submodule_name_for_path(const char* path);
 
 #endif
-- 
2.5.0.236.g32a3769

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* Re: [PATCH] submodule: implement `module_name` as a builtin helper
  2015-08-05 21:08       ` [PATCH] " Stefan Beller
@ 2015-08-06 19:49         ` Jens Lehmann
  2015-08-06 21:38           ` Stefan Beller
  2015-08-07 20:17           ` Junio C Hamano
  0 siblings, 2 replies; 27+ messages in thread
From: Jens Lehmann @ 2015-08-06 19:49 UTC (permalink / raw)
  To: Stefan Beller, gitster; +Cc: hvoigt, git

Am 05.08.2015 um 23:08 schrieb Stefan Beller:
> This implements the helper `module_name` in C instead of shell,
> yielding a nice performance boost.
>
> Before this patch, I measured a time (best out of three):
>
>    $ time ./t7400-submodule-basic.sh  >/dev/null
>      real	0m11.066s
>      user	0m3.348s
>      sys	0m8.534s
>
> With this patch applied I measured (also best out of three)
>
>    $ time ./t7400-submodule-basic.sh  >/dev/null
>      real	0m10.063s
>      user	0m3.044s
>      sys	0m7.487s
>
> Signed-off-by: Stefan Beller <sbeller@google.com>
> ---
>
> Is this what you had in mind, Jens?

Yup, thanks!

Just some small nits, please see below (and also in the fixup! commit
I pushed on the "submodule--helper" branch in my Github repo).

> Jonathan pointed me to https://github.com/jlehmann/git-submod-enhancements/wiki
> Does it reflect reality (i.e. as time passes code changes)?
 >
> I also noticed that you have made quite some changes to submodules on different
> branches which are not upstream. Soem changes look familiar (as in "I believe
> this is upstream alreaday?" while others look new and exciting to me).
> I could not quite get the order yet, though.

I think the Wiki should be pretty much up to date, but I'll try to check
that and the state of the branches on the weekend to see if it needs an
update. If you see some branches you believe are already upstream, it'd
be great if you could mention them so I can double check.

>   builtin/submodule--helper.c | 23 +++++++++++++++++++++++
>   git-submodule.sh            | 32 +++++++-------------------------
>   submodule.c                 | 18 +++++++++++++-----
>   submodule.h                 |  1 +
>   4 files changed, 44 insertions(+), 30 deletions(-)
>
> diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
> index cb18ddf..3713c4c 100644
> --- a/builtin/submodule--helper.c
> +++ b/builtin/submodule--helper.c
> @@ -5,6 +5,8 @@
>   #include "pathspec.h"
>   #include "dir.h"
>   #include "utf8.h"
> +#include "submodule.h"
> +#include "string-list.h"
>
>   static char *ps_matched;
>   static const struct cache_entry **ce_entries;
> @@ -98,6 +100,24 @@ static int module_list(int argc, const char **argv, const char *prefix)
>   	return 0;
>   }
>
> +static int module_name(int argc, const char **argv, const char *prefix)
> +{
> +	const char *name;
> +
> +	if (argc != 1)
> +		usage("git submodule--helper module_name <path>\n");
> +
> +	gitmodules_config();
> +	name = submodule_name_for_path(argv[0]);
> +
> +	if (name)
> +		printf("%s\n", name);
> +	else
> +		die("No submodule mapping found in .gitmodules for path '%s'", argv[0]);

Hmm, I prefer the pattern to bail out inside if() and continue with the
expected case without else:

+	if (!name)
+		die("No submodule mapping found in .gitmodules for path '%s'", argv[0]);
+
+	printf("%s\n", name);

But maybe that's just me.

> +	return 0;
> +}
> +
>   int cmd_submodule__helper(int argc, const char **argv, const char *prefix)
>   {
>   	if (argc < 2)
> @@ -106,6 +126,9 @@ int cmd_submodule__helper(int argc, const char **argv, const char *prefix)
>   	if (!strcmp(argv[1], "module_list"))
>   		return module_list(argc - 1, argv + 1, prefix);
>
> +	if (!strcmp(argv[1], "module_name"))
> +		return module_name(argc - 2, argv + 2, prefix);
> +
>   usage:
>   	usage("git submodule--helper module_list\n");
>   }
> diff --git a/git-submodule.sh b/git-submodule.sh
> index af9ecef..e6ff38d 100755
> --- a/git-submodule.sh
> +++ b/git-submodule.sh
> @@ -178,24 +178,6 @@ get_submodule_config () {
>   	printf '%s' "${value:-$default}"
>   }
>
> -
> -#
> -# Map submodule path to submodule name
> -#
> -# $1 = path
> -#
> -module_name()
> -{
> -	# Do we have "submodule.<something>.path = $1" defined in .gitmodules file?
> -	sm_path="$1"
> -	re=$(printf '%s\n' "$1" | sed -e 's/[].[^$\\*]/\\&/g')
> -	name=$( git config -f .gitmodules --get-regexp '^submodule\..*\.path$' |
> -		sed -n -e 's|^submodule\.\(.*\)\.path '"$re"'$|\1|p' )
> -	test -z "$name" &&
> -	die "$(eval_gettext "No submodule mapping found in .gitmodules for path '\$sm_path'")"
> -	printf '%s\n' "$name"
> -}
> -
>   #
>   # Clone a submodule
>   #
> @@ -498,7 +480,7 @@ cmd_foreach()
>   		then
>   			displaypath=$(relative_path "$sm_path")
>   			say "$(eval_gettext "Entering '\$prefix\$displaypath'")"
> -			name=$(module_name "$sm_path")
> +			name=$(git submodule--helper module_name "$sm_path")
>   			(
>   				prefix="$prefix$sm_path/"
>   				clear_local_git_env
> @@ -554,7 +536,7 @@ cmd_init()
>   	while read mode sha1 stage sm_path
>   	do
>   		die_if_unmatched "$mode"
> -		name=$(module_name "$sm_path") || exit
> +		name=$(git submodule--helper module_name "$sm_path") || exit
>
>   		displaypath=$(relative_path "$sm_path")
>
> @@ -636,7 +618,7 @@ cmd_deinit()
>   	while read mode sha1 stage sm_path
>   	do
>   		die_if_unmatched "$mode"
> -		name=$(module_name "$sm_path") || exit
> +		name=$(git submodule--helper module_name "$sm_path") || exit
>
>   		displaypath=$(relative_path "$sm_path")
>
> @@ -758,7 +740,7 @@ cmd_update()
>   			echo >&2 "Skipping unmerged submodule $prefix$sm_path"
>   			continue
>   		fi
> -		name=$(module_name "$sm_path") || exit
> +		name=$(git submodule--helper module_name "$sm_path") || exit
>   		url=$(git config submodule."$name".url)
>   		branch=$(get_submodule_config "$name" branch master)
>   		if ! test -z "$update"
> @@ -1022,7 +1004,7 @@ cmd_summary() {
>   			# Respect the ignore setting for --for-status.
>   			if test -n "$for_status"
>   			then
> -				name=$(module_name "$sm_path")
> +				name=$(git submodule--helper module_name "$sm_path")
>   				ignore_config=$(get_submodule_config "$name" ignore none)
>   				test $status != A && test $ignore_config = all && continue
>   			fi
> @@ -1184,7 +1166,7 @@ cmd_status()
>   	while read mode sha1 stage sm_path
>   	do
>   		die_if_unmatched "$mode"
> -		name=$(module_name "$sm_path") || exit
> +		name=$(git submodule--helper module_name "$sm_path") || exit
>   		url=$(git config submodule."$name".url)
>   		displaypath=$(relative_path "$prefix$sm_path")
>   		if test "$stage" = U
> @@ -1261,7 +1243,7 @@ cmd_sync()
>   	while read mode sha1 stage sm_path
>   	do
>   		die_if_unmatched "$mode"
> -		name=$(module_name "$sm_path")
> +		name=$(git submodule--helper module_name "$sm_path")
>   		url=$(git config -f .gitmodules --get submodule."$name".url)
>
>   		# Possibly a url relative to parent
> diff --git a/submodule.c b/submodule.c
> index 700bbf4..dd8dcb0 100644
> --- a/submodule.c
> +++ b/submodule.c
> @@ -686,6 +686,16 @@ static void calculate_changed_submodule_paths(void)
>   	initialized_fetch_ref_tips = 0;
>   }
>
> +const char* submodule_name_for_path(const char* path)

Asterisk belongs to the name, not the type:

+const char *submodule_name_for_path(const char *path)

> +{
> +	struct string_list_item *item;
> +	item = unsorted_string_list_lookup(&config_name_for_path, path);
> +	if (item)
> +		return item->util;
> +	else
> +		return NULL;

Maybe we could use the "if() bailout;" pattern here too:

+	if (!item)
+		return NULL;
+
+	return item->util;

("else" after "return" looks strange ;-)

> +}
> +
>   int fetch_populated_submodules(const struct argv_array *options,
>   			       const char *prefix, int command_line_option,
>   			       int quiet)
> @@ -693,7 +703,7 @@ int fetch_populated_submodules(const struct argv_array *options,
>   	int i, result = 0;
>   	struct child_process cp = CHILD_PROCESS_INIT;
>   	struct argv_array argv = ARGV_ARRAY_INIT;
> -	struct string_list_item *name_for_path;
> +	const char *name_for_path;
>   	const char *work_tree = get_git_work_tree();
>   	if (!work_tree)
>   		goto out;
> @@ -723,10 +733,8 @@ int fetch_populated_submodules(const struct argv_array *options,
>   		if (!S_ISGITLINK(ce->ce_mode))
>   			continue;
>
> -		name = ce->name;
> -		name_for_path = unsorted_string_list_lookup(&config_name_for_path, ce->name);
> -		if (name_for_path)
> -			name = name_for_path->util;
> +		name_for_path = submodule_name_for_path(ce->name);
> +		name =  name_for_path ? name_for_path : ce->name;

I think we can get rid of name_for_path auto variable altogether here.
(And while at it why not add the previously missing comment why we do
fall back to the path here?):

+		name = submodule_name_for_path(ce->name);
+		if (!name)
+			/* Not in .gitmodules, try the default name == path */
+			name = ce->name;

>   		default_argv = "yes";
>   		if (command_line_option == RECURSE_SUBMODULES_DEFAULT) {
> diff --git a/submodule.h b/submodule.h
> index 7beec48..e3dd854 100644
> --- a/submodule.h
> +++ b/submodule.h
> @@ -41,5 +41,6 @@ int find_unpushed_submodules(unsigned char new_sha1[20], const char *remotes_nam
>   		struct string_list *needs_pushing);
>   int push_unpushed_submodules(unsigned char new_sha1[20], const char *remotes_name);
>   void connect_work_tree_and_git_dir(const char *work_tree, const char *git_dir);
> +const char* submodule_name_for_path(const char* path);

Asterisk belongs to the names, not the type here too.

+const char *submodule_name_for_path(const char *path);

>
>   #endif
>

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH] submodule: implement `module_name` as a builtin helper
  2015-08-06 19:49         ` Jens Lehmann
@ 2015-08-06 21:38           ` Stefan Beller
  2015-08-07 20:03             ` Junio C Hamano
  2015-08-07 20:17           ` Junio C Hamano
  1 sibling, 1 reply; 27+ messages in thread
From: Stefan Beller @ 2015-08-06 21:38 UTC (permalink / raw)
  To: gitster; +Cc: Stefan Beller, Jens.Lehmann, git, hvoigt

This implements the helper `module_name` in C instead of shell,
yielding a nice performance boost.

Before this patch, I measured a time (best out of three):

  $ time ./t7400-submodule-basic.sh  >/dev/null
    real	0m11.066s
    user	0m3.348s
    sys	0m8.534s

With this patch applied I measured (also best out of three)

  $ time ./t7400-submodule-basic.sh  >/dev/null
    real	0m10.063s
    user	0m3.044s
    sys	0m7.487s

Helped-by: Jens Lehmann <Jens.Lehmann@web.de>
Signed-off-by: Stefan Beller <sbeller@google.com>
---

This incorporates the changes from Jens fixup! commit
(which addresses all issues he pointed out).

I agree this looks much cleaner. :)

This patch advances origin/sb/submodule-helper (d2c6c09ac8,
submodule: implement `module_list` as a builtin helper)

 builtin/submodule--helper.c | 22 ++++++++++++++++++++++
 git-submodule.sh            | 32 +++++++-------------------------
 submodule.c                 | 19 ++++++++++++++-----
 submodule.h                 |  1 +
 4 files changed, 44 insertions(+), 30 deletions(-)

diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
index cb18ddf..bc37b74 100644
--- a/builtin/submodule--helper.c
+++ b/builtin/submodule--helper.c
@@ -5,6 +5,8 @@
 #include "pathspec.h"
 #include "dir.h"
 #include "utf8.h"
+#include "submodule.h"
+#include "string-list.h"
 
 static char *ps_matched;
 static const struct cache_entry **ce_entries;
@@ -98,6 +100,23 @@ static int module_list(int argc, const char **argv, const char *prefix)
 	return 0;
 }
 
+static int module_name(int argc, const char **argv, const char *prefix)
+{
+	const char *name;
+
+	if (argc != 1)
+		usage("git submodule--helper module_name <path>\n");
+
+	gitmodules_config();
+	name = submodule_name_for_path(argv[0]);
+
+	if (!name)
+		die("No submodule mapping found in .gitmodules for path '%s'", argv[0]);
+
+	printf("%s\n", name);
+	return 0;
+}
+
 int cmd_submodule__helper(int argc, const char **argv, const char *prefix)
 {
 	if (argc < 2)
@@ -106,6 +125,9 @@ int cmd_submodule__helper(int argc, const char **argv, const char *prefix)
 	if (!strcmp(argv[1], "module_list"))
 		return module_list(argc - 1, argv + 1, prefix);
 
+	if (!strcmp(argv[1], "module_name"))
+		return module_name(argc - 2, argv + 2, prefix);
+
 usage:
 	usage("git submodule--helper module_list\n");
 }
diff --git a/git-submodule.sh b/git-submodule.sh
index af9ecef..e6ff38d 100755
--- a/git-submodule.sh
+++ b/git-submodule.sh
@@ -178,24 +178,6 @@ get_submodule_config () {
 	printf '%s' "${value:-$default}"
 }
 
-
-#
-# Map submodule path to submodule name
-#
-# $1 = path
-#
-module_name()
-{
-	# Do we have "submodule.<something>.path = $1" defined in .gitmodules file?
-	sm_path="$1"
-	re=$(printf '%s\n' "$1" | sed -e 's/[].[^$\\*]/\\&/g')
-	name=$( git config -f .gitmodules --get-regexp '^submodule\..*\.path$' |
-		sed -n -e 's|^submodule\.\(.*\)\.path '"$re"'$|\1|p' )
-	test -z "$name" &&
-	die "$(eval_gettext "No submodule mapping found in .gitmodules for path '\$sm_path'")"
-	printf '%s\n' "$name"
-}
-
 #
 # Clone a submodule
 #
@@ -498,7 +480,7 @@ cmd_foreach()
 		then
 			displaypath=$(relative_path "$sm_path")
 			say "$(eval_gettext "Entering '\$prefix\$displaypath'")"
-			name=$(module_name "$sm_path")
+			name=$(git submodule--helper module_name "$sm_path")
 			(
 				prefix="$prefix$sm_path/"
 				clear_local_git_env
@@ -554,7 +536,7 @@ cmd_init()
 	while read mode sha1 stage sm_path
 	do
 		die_if_unmatched "$mode"
-		name=$(module_name "$sm_path") || exit
+		name=$(git submodule--helper module_name "$sm_path") || exit
 
 		displaypath=$(relative_path "$sm_path")
 
@@ -636,7 +618,7 @@ cmd_deinit()
 	while read mode sha1 stage sm_path
 	do
 		die_if_unmatched "$mode"
-		name=$(module_name "$sm_path") || exit
+		name=$(git submodule--helper module_name "$sm_path") || exit
 
 		displaypath=$(relative_path "$sm_path")
 
@@ -758,7 +740,7 @@ cmd_update()
 			echo >&2 "Skipping unmerged submodule $prefix$sm_path"
 			continue
 		fi
-		name=$(module_name "$sm_path") || exit
+		name=$(git submodule--helper module_name "$sm_path") || exit
 		url=$(git config submodule."$name".url)
 		branch=$(get_submodule_config "$name" branch master)
 		if ! test -z "$update"
@@ -1022,7 +1004,7 @@ cmd_summary() {
 			# Respect the ignore setting for --for-status.
 			if test -n "$for_status"
 			then
-				name=$(module_name "$sm_path")
+				name=$(git submodule--helper module_name "$sm_path")
 				ignore_config=$(get_submodule_config "$name" ignore none)
 				test $status != A && test $ignore_config = all && continue
 			fi
@@ -1184,7 +1166,7 @@ cmd_status()
 	while read mode sha1 stage sm_path
 	do
 		die_if_unmatched "$mode"
-		name=$(module_name "$sm_path") || exit
+		name=$(git submodule--helper module_name "$sm_path") || exit
 		url=$(git config submodule."$name".url)
 		displaypath=$(relative_path "$prefix$sm_path")
 		if test "$stage" = U
@@ -1261,7 +1243,7 @@ cmd_sync()
 	while read mode sha1 stage sm_path
 	do
 		die_if_unmatched "$mode"
-		name=$(module_name "$sm_path")
+		name=$(git submodule--helper module_name "$sm_path")
 		url=$(git config -f .gitmodules --get submodule."$name".url)
 
 		# Possibly a url relative to parent
diff --git a/submodule.c b/submodule.c
index 15e90d1..78d7616 100644
--- a/submodule.c
+++ b/submodule.c
@@ -686,6 +686,16 @@ static void calculate_changed_submodule_paths(void)
 	initialized_fetch_ref_tips = 0;
 }
 
+const char *submodule_name_for_path(const char *path)
+{
+	struct string_list_item *item;
+	item = unsorted_string_list_lookup(&config_name_for_path, path);
+	if (!item)
+		return NULL;
+
+	return item->util;
+}
+
 int fetch_populated_submodules(const struct argv_array *options,
 			       const char *prefix, int command_line_option,
 			       int quiet)
@@ -693,7 +703,6 @@ int fetch_populated_submodules(const struct argv_array *options,
 	int i, result = 0;
 	struct child_process cp = CHILD_PROCESS_INIT;
 	struct argv_array argv = ARGV_ARRAY_INIT;
-	struct string_list_item *name_for_path;
 	const char *work_tree = get_git_work_tree();
 	if (!work_tree)
 		goto out;
@@ -723,10 +732,10 @@ int fetch_populated_submodules(const struct argv_array *options,
 		if (!S_ISGITLINK(ce->ce_mode))
 			continue;
 
-		name = ce->name;
-		name_for_path = unsorted_string_list_lookup(&config_name_for_path, ce->name);
-		if (name_for_path)
-			name = name_for_path->util;
+		name = submodule_name_for_path(ce->name);
+		if (!name)
+			/* Not in .gitmodules, try the default name == path */
+			name = ce->name;
 
 		default_argv = "yes";
 		if (command_line_option == RECURSE_SUBMODULES_DEFAULT) {
diff --git a/submodule.h b/submodule.h
index 7beec48..fc7f8a6 100644
--- a/submodule.h
+++ b/submodule.h
@@ -41,5 +41,6 @@ int find_unpushed_submodules(unsigned char new_sha1[20], const char *remotes_nam
 		struct string_list *needs_pushing);
 int push_unpushed_submodules(unsigned char new_sha1[20], const char *remotes_name);
 void connect_work_tree_and_git_dir(const char *work_tree, const char *git_dir);
+const char *submodule_name_for_path(const char *path);
 
 #endif
-- 
2.5.0.239.g9728e1d.dirty

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* Re: [PATCH 1/4] submodule: implement `module_list` as a builtin helper
  2015-08-05  0:04 [PATCH 1/4] submodule: implement `module_list` as a builtin helper Stefan Beller
  2015-08-05  0:04 ` [PATCH 2/4] submodule: implement `module_name` " Stefan Beller
  2015-08-05 18:31 ` [PATCH 1/4] submodule: implement `module_list` " Jens Lehmann
@ 2015-08-07 19:53 ` Junio C Hamano
  2 siblings, 0 replies; 27+ messages in thread
From: Junio C Hamano @ 2015-08-07 19:53 UTC (permalink / raw)
  To: Stefan Beller; +Cc: git, jens.lehmann, hvoigt

Stefan Beller <sbeller@google.com> writes:

> diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
> new file mode 100644
> index 0000000..cb18ddf
> --- /dev/null
> +++ b/builtin/submodule--helper.c
> @@ -0,0 +1,111 @@
> + ...
> +static char *ps_matched;
> +static const struct cache_entry **ce_entries;
> +static int ce_alloc, ce_used;
> +static struct pathspec pathspec;
> +static const char *alternative_path;

These are OK for now to be global variables, but in the longer run,
I think you would need to introduce a struct or two that group the
relevant pieces and passed around the callchain.  For example, a
caller calling into module_list_compute() would want to pass a
pointer to a struct that has ce_entries[] and ps_matched to receive
the result.  pathspec and alternative_path would want to be a
function-scope auto variable in module_list, I would think.

> +static void module_list_compute(int argc, const char **argv,
> +				const char *prefix,
> +				struct pathspec *pathspec)
> +{
> +	int i;
> +	char *max_prefix;
> +	int max_prefix_len;
> +	parse_pathspec(pathspec, 0,
> +		       PATHSPEC_PREFER_FULL |
> +		       PATHSPEC_STRIP_SUBMODULE_SLASH_CHEAP,
> +		       prefix, argv);
> +
> +	/* Find common prefix for all pathspec's */
> +	max_prefix = common_prefix(pathspec);
> +	max_prefix_len = max_prefix ? strlen(max_prefix) : 0;
> +
> +	if (pathspec->nr)
> +		ps_matched = xcalloc(1, pathspec->nr);
> +
> +
> +	if (read_cache() < 0)
> +		die("index file corrupt");

Again, this is OK for now, but I suspect you would eventually want
to return an error and have the caller react to it.

> +static int module_list(int argc, const char **argv, const char *prefix)
> +{
> +...
> +	for (i = 0; i < ce_used; i++) {
> +		const struct cache_entry *ce = ce_entries[i];
> +
> +		if (string_list_has_string(&already_printed, ce->name))
> +			continue;
> +
> +		if (ce_stage(ce)) {
> +			printf("%06o %s U\t", ce->ce_mode, sha1_to_hex(null_sha1));
> +		} else {
> +			printf("%06o %s %d\t", ce->ce_mode, sha1_to_hex(ce->sha1), ce_stage(ce));
> +		}
> +
> +		utf8_fprintf(stdout, "%s\n", ce->name);
> +
> +		string_list_insert(&already_printed, ce->name);

This looks a wasteful use of string-list.

When we iterate over the in-core index, or a subset obtained from
the in-core index without reordering, the standard technique to
handle entries for the same path only once is to handle one (while
remembering what it is) and then skip the ones that follow with the
same name, with a loop like this:

	i = 0;
        while (i < ce_used) {
        	ce = ce_entries[i++];
		use that ce;
                while (i < ce_used && !strcmp(ce->ce_name, ce_entries[i]->ce_name))
			i++; /* skip entries with the same name */
	}

Take advantage of the fact that the entries are still sorted.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] submodule: implement `module_name` as a builtin helper
  2015-08-06 21:38           ` Stefan Beller
@ 2015-08-07 20:03             ` Junio C Hamano
  2015-08-07 20:54               ` Stefan Beller
  0 siblings, 1 reply; 27+ messages in thread
From: Junio C Hamano @ 2015-08-07 20:03 UTC (permalink / raw)
  To: Stefan Beller; +Cc: Jens.Lehmann, git, hvoigt

Stefan Beller <sbeller@google.com> writes:

> This incorporates the changes from Jens fixup! commit
> (which addresses all issues he pointed out).
>
> I agree this looks much cleaner. :)

The only thing I found somewhat questionable is where to call
gitmodules_config() from.  I think it is OK to do this at the
beginning of module_name(), at least for now, simply because the
other function module_list() does not need to.

When you rewrite sufficiently large parts of the scripted Porcelain
into C so that different pieces translated from the shell functions
directly call into each other, that may have to change, though.  I
do not think gitmodules_config() is designed to be called more than
once, and I expect module_name() would be called many times inside a
loop.

Other than that, this is a trivial refactoring (i.e. a new helper
function added to submodule.c can be used from an existing
open-coded logic in the same file, and then the same helper function
gains a new callsite in submodule--helper.c) that makes things
easier to read.

Thanks.

>  builtin/submodule--helper.c | 22 ++++++++++++++++++++++
>  git-submodule.sh            | 32 +++++++-------------------------
>  submodule.c                 | 19 ++++++++++++++-----
>  submodule.h                 |  1 +
>  4 files changed, 44 insertions(+), 30 deletions(-)
>
> diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
> index cb18ddf..bc37b74 100644
> --- a/builtin/submodule--helper.c
> +++ b/builtin/submodule--helper.c
> @@ -5,6 +5,8 @@
>  #include "pathspec.h"
>  #include "dir.h"
>  #include "utf8.h"
> +#include "submodule.h"
> +#include "string-list.h"
>  
>  static char *ps_matched;
>  static const struct cache_entry **ce_entries;
> @@ -98,6 +100,23 @@ static int module_list(int argc, const char **argv, const char *prefix)
>  	return 0;
>  }
>  
> +static int module_name(int argc, const char **argv, const char *prefix)
> +{
> +	const char *name;
> +
> +	if (argc != 1)
> +		usage("git submodule--helper module_name <path>\n");
> +
> +	gitmodules_config();
> +	name = submodule_name_for_path(argv[0]);
> +
> +	if (!name)
> +		die("No submodule mapping found in .gitmodules for path '%s'", argv[0]);
> +
> +	printf("%s\n", name);
> +	return 0;
> +}
> +
>  int cmd_submodule__helper(int argc, const char **argv, const char *prefix)
>  {
>  	if (argc < 2)
> @@ -106,6 +125,9 @@ int cmd_submodule__helper(int argc, const char **argv, const char *prefix)
>  	if (!strcmp(argv[1], "module_list"))
>  		return module_list(argc - 1, argv + 1, prefix);
>  
> +	if (!strcmp(argv[1], "module_name"))
> +		return module_name(argc - 2, argv + 2, prefix);
> +
>  usage:
>  	usage("git submodule--helper module_list\n");
>  }
> diff --git a/git-submodule.sh b/git-submodule.sh
> index af9ecef..e6ff38d 100755
> --- a/git-submodule.sh
> +++ b/git-submodule.sh
> @@ -178,24 +178,6 @@ get_submodule_config () {
>  	printf '%s' "${value:-$default}"
>  }
>  
> -
> -#
> -# Map submodule path to submodule name
> -#
> -# $1 = path
> -#
> -module_name()
> -{
> -	# Do we have "submodule.<something>.path = $1" defined in .gitmodules file?
> -	sm_path="$1"
> -	re=$(printf '%s\n' "$1" | sed -e 's/[].[^$\\*]/\\&/g')
> -	name=$( git config -f .gitmodules --get-regexp '^submodule\..*\.path$' |
> -		sed -n -e 's|^submodule\.\(.*\)\.path '"$re"'$|\1|p' )
> -	test -z "$name" &&
> -	die "$(eval_gettext "No submodule mapping found in .gitmodules for path '\$sm_path'")"
> -	printf '%s\n' "$name"
> -}
> -
>  #
>  # Clone a submodule
>  #
> @@ -498,7 +480,7 @@ cmd_foreach()
>  		then
>  			displaypath=$(relative_path "$sm_path")
>  			say "$(eval_gettext "Entering '\$prefix\$displaypath'")"
> -			name=$(module_name "$sm_path")
> +			name=$(git submodule--helper module_name "$sm_path")
>  			(
>  				prefix="$prefix$sm_path/"
>  				clear_local_git_env
> @@ -554,7 +536,7 @@ cmd_init()
>  	while read mode sha1 stage sm_path
>  	do
>  		die_if_unmatched "$mode"
> -		name=$(module_name "$sm_path") || exit
> +		name=$(git submodule--helper module_name "$sm_path") || exit
>  
>  		displaypath=$(relative_path "$sm_path")
>  
> @@ -636,7 +618,7 @@ cmd_deinit()
>  	while read mode sha1 stage sm_path
>  	do
>  		die_if_unmatched "$mode"
> -		name=$(module_name "$sm_path") || exit
> +		name=$(git submodule--helper module_name "$sm_path") || exit
>  
>  		displaypath=$(relative_path "$sm_path")
>  
> @@ -758,7 +740,7 @@ cmd_update()
>  			echo >&2 "Skipping unmerged submodule $prefix$sm_path"
>  			continue
>  		fi
> -		name=$(module_name "$sm_path") || exit
> +		name=$(git submodule--helper module_name "$sm_path") || exit
>  		url=$(git config submodule."$name".url)
>  		branch=$(get_submodule_config "$name" branch master)
>  		if ! test -z "$update"
> @@ -1022,7 +1004,7 @@ cmd_summary() {
>  			# Respect the ignore setting for --for-status.
>  			if test -n "$for_status"
>  			then
> -				name=$(module_name "$sm_path")
> +				name=$(git submodule--helper module_name "$sm_path")
>  				ignore_config=$(get_submodule_config "$name" ignore none)
>  				test $status != A && test $ignore_config = all && continue
>  			fi
> @@ -1184,7 +1166,7 @@ cmd_status()
>  	while read mode sha1 stage sm_path
>  	do
>  		die_if_unmatched "$mode"
> -		name=$(module_name "$sm_path") || exit
> +		name=$(git submodule--helper module_name "$sm_path") || exit
>  		url=$(git config submodule."$name".url)
>  		displaypath=$(relative_path "$prefix$sm_path")
>  		if test "$stage" = U
> @@ -1261,7 +1243,7 @@ cmd_sync()
>  	while read mode sha1 stage sm_path
>  	do
>  		die_if_unmatched "$mode"
> -		name=$(module_name "$sm_path")
> +		name=$(git submodule--helper module_name "$sm_path")
>  		url=$(git config -f .gitmodules --get submodule."$name".url)
>  
>  		# Possibly a url relative to parent
> diff --git a/submodule.c b/submodule.c
> index 15e90d1..78d7616 100644
> --- a/submodule.c
> +++ b/submodule.c
> @@ -686,6 +686,16 @@ static void calculate_changed_submodule_paths(void)
>  	initialized_fetch_ref_tips = 0;
>  }
>  
> +const char *submodule_name_for_path(const char *path)
> +{
> +	struct string_list_item *item;
> +	item = unsorted_string_list_lookup(&config_name_for_path, path);
> +	if (!item)
> +		return NULL;
> +
> +	return item->util;
> +}
> +
>  int fetch_populated_submodules(const struct argv_array *options,
>  			       const char *prefix, int command_line_option,
>  			       int quiet)
> @@ -693,7 +703,6 @@ int fetch_populated_submodules(const struct argv_array *options,
>  	int i, result = 0;
>  	struct child_process cp = CHILD_PROCESS_INIT;
>  	struct argv_array argv = ARGV_ARRAY_INIT;
> -	struct string_list_item *name_for_path;
>  	const char *work_tree = get_git_work_tree();
>  	if (!work_tree)
>  		goto out;
> @@ -723,10 +732,10 @@ int fetch_populated_submodules(const struct argv_array *options,
>  		if (!S_ISGITLINK(ce->ce_mode))
>  			continue;
>  
> -		name = ce->name;
> -		name_for_path = unsorted_string_list_lookup(&config_name_for_path, ce->name);
> -		if (name_for_path)
> -			name = name_for_path->util;
> +		name = submodule_name_for_path(ce->name);
> +		if (!name)
> +			/* Not in .gitmodules, try the default name == path */
> +			name = ce->name;
>  
>  		default_argv = "yes";
>  		if (command_line_option == RECURSE_SUBMODULES_DEFAULT) {
> diff --git a/submodule.h b/submodule.h
> index 7beec48..fc7f8a6 100644
> --- a/submodule.h
> +++ b/submodule.h
> @@ -41,5 +41,6 @@ int find_unpushed_submodules(unsigned char new_sha1[20], const char *remotes_nam
>  		struct string_list *needs_pushing);
>  int push_unpushed_submodules(unsigned char new_sha1[20], const char *remotes_name);
>  void connect_work_tree_and_git_dir(const char *work_tree, const char *git_dir);
> +const char *submodule_name_for_path(const char *path);
>  
>  #endif

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] submodule: implement `module_name` as a builtin helper
  2015-08-06 19:49         ` Jens Lehmann
  2015-08-06 21:38           ` Stefan Beller
@ 2015-08-07 20:17           ` Junio C Hamano
  2015-08-07 20:49             ` Stefan Beller
  1 sibling, 1 reply; 27+ messages in thread
From: Junio C Hamano @ 2015-08-07 20:17 UTC (permalink / raw)
  To: Jens Lehmann, hvoigt; +Cc: Stefan Beller, git

Jens Lehmann <Jens.Lehmann@web.de> writes:

This change...

>> @@ -723,10 +733,8 @@ int fetch_populated_submodules(const struct argv_array *options,
>>   		if (!S_ISGITLINK(ce->ce_mode))
>>   			continue;
>>
>> -		name = ce->name;
>> -		name_for_path = unsorted_string_list_lookup(&config_name_for_path, ce->name);
>> -		if (name_for_path)
>> -			name = name_for_path->util;
>> +		name_for_path = submodule_name_for_path(ce->name);
>> +		name =  name_for_path ? name_for_path : ce->name;

... interacts with Heiko's cached submodule config work that seems
to have stalled.

I can discard the stalled topic and queue this one instead, asking
Heiko to reroll his on top once this has stabilized, or if Stefan is
really into revamping submodule now (which I hope is the case),
perhaps Heiko's work can be rerolled by Stefan (with help from
others, of course) as a prerequisite and then these changes can be
built on top of it?

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] submodule: implement `module_name` as a builtin helper
  2015-08-07 20:17           ` Junio C Hamano
@ 2015-08-07 20:49             ` Stefan Beller
  2015-08-07 21:14               ` Junio C Hamano
  0 siblings, 1 reply; 27+ messages in thread
From: Stefan Beller @ 2015-08-07 20:49 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Jens Lehmann, Heiko Voigt, git

On Fri, Aug 7, 2015 at 1:17 PM, Junio C Hamano <gitster@pobox.com> wrote:
> Jens Lehmann <Jens.Lehmann@web.de> writes:
>
> This change...
>
>>> @@ -723,10 +733,8 @@ int fetch_populated_submodules(const struct argv_array *options,
>>>              if (!S_ISGITLINK(ce->ce_mode))
>>>                      continue;
>>>
>>> -            name = ce->name;
>>> -            name_for_path = unsorted_string_list_lookup(&config_name_for_path, ce->name);
>>> -            if (name_for_path)
>>> -                    name = name_for_path->util;
>>> +            name_for_path = submodule_name_for_path(ce->name);
>>> +            name =  name_for_path ? name_for_path : ce->name;
>
> ... interacts with Heiko's cached submodule config work that seems
> to have stalled.

We can drop that hunk as it only uses the new method
`submodule_name_for_path` but doesn't change functionality.
So if you want to keep Heikos work, I'll just resend the patch
without that hunk.

>
> I can discard the stalled topic and queue this one instead, asking
> Heiko to reroll his on top once this has stabilized, or if Stefan is
> really into revamping submodule now (which I hope is the case),
> perhaps Heiko's work can be rerolled by Stefan (with help from
> others, of course) as a prerequisite and then these changes can be
> built on top of it?

I am a bit overwhelmed as I am into git submodule for a few days now
and still have not a full&good understanding of the details.
So for now my plan looks like this:

1) rewrite the helpers in C (module_list, module_name and module_clone)
   These helper functions are useful for themselves as they speed up
   git submodule operations, but I have them on the plan as they are a
   pre requisite for rewriting `git submodule update`

2) Come up with a good thread pool abstraction
   (Started as "[RFC/PATCH 0/4] parallel fetch for submodules" )
   This abstraction (if done right) will allow us to use it in different places
   easily. I started it as part of "git fetch --recurse-submodules" because
   it is submodule related and reasonably sized

3) Rewrite `git submodule update` in C
  This will start out as a literal translation and once 2 is coming
along nicely,
  I want to add that thread pool capability to the C rewrite.

This plan still is huge to me, but a lot smaller than my initial
"rewrite the whole
git-submodule.sh in C".

Once the rewrite and parallelism is done, I may want to start adding new
features to the submodules, such as different "classes". Currently I think
each submodule would have a set of these classes or features, such that
you could clone a superproject with hints on the classes:

  git clone <url-to-super-project> --submodule-flags=basis,amiga-specific,ui

each of the flags would then include a specifc set of submodules, such
that after cloning the super project you have everything you need to build
the program with ui on amiga.






>

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] submodule: implement `module_name` as a builtin helper
  2015-08-07 20:03             ` Junio C Hamano
@ 2015-08-07 20:54               ` Stefan Beller
  0 siblings, 0 replies; 27+ messages in thread
From: Stefan Beller @ 2015-08-07 20:54 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Jens Lehmann, git, Heiko Voigt

On Fri, Aug 7, 2015 at 1:03 PM, Junio C Hamano <gitster@pobox.com> wrote:
> Stefan Beller <sbeller@google.com> writes:
>
>> This incorporates the changes from Jens fixup! commit
>> (which addresses all issues he pointed out).
>>
>> I agree this looks much cleaner. :)
>
> The only thing I found somewhat questionable is where to call
> gitmodules_config() from.  I think it is OK to do this at the
> beginning of module_name(), at least for now, simply because the
> other function module_list() does not need to.
>
> When you rewrite sufficiently large parts of the scripted Porcelain
> into C so that different pieces translated from the shell functions
> directly call into each other, that may have to change, though.  I
> do not think gitmodules_config() is designed to be called more than
> once, and I expect module_name() would be called many times inside a
> loop.

I want to structure the each part rewritten as a reusable core part
and a wrapper which just sets up the environment (option parsing,
reading the index, configs).

In this patch you only see the latter part, the wrapper, because the
core part only consists of one line

    name = submodule_name_for_path(<input>);

so I did not want to wrap that into its own function. But when rewriting
piece of shell code, which originally called into module_name, I'd rather
use the one liner instead. (This doesn't quite follow the literal translation
strategy, but I think it may be appropriate here).

>
> Other than that, this is a trivial refactoring (i.e. a new helper
> function added to submodule.c can be used from an existing
> open-coded logic in the same file, and then the same helper function
> gains a new callsite in submodule--helper.c) that makes things
> easier to read.
>
> Thanks.
>
>>  builtin/submodule--helper.c | 22 ++++++++++++++++++++++
>>  git-submodule.sh            | 32 +++++++-------------------------
>>  submodule.c                 | 19 ++++++++++++++-----
>>  submodule.h                 |  1 +
>>  4 files changed, 44 insertions(+), 30 deletions(-)
>>
>> diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
>> index cb18ddf..bc37b74 100644
>> --- a/builtin/submodule--helper.c
>> +++ b/builtin/submodule--helper.c
>> @@ -5,6 +5,8 @@
>>  #include "pathspec.h"
>>  #include "dir.h"
>>  #include "utf8.h"
>> +#include "submodule.h"
>> +#include "string-list.h"
>>
>>  static char *ps_matched;
>>  static const struct cache_entry **ce_entries;
>> @@ -98,6 +100,23 @@ static int module_list(int argc, const char **argv, const char *prefix)
>>       return 0;
>>  }
>>
>> +static int module_name(int argc, const char **argv, const char *prefix)
>> +{
>> +     const char *name;
>> +
>> +     if (argc != 1)
>> +             usage("git submodule--helper module_name <path>\n");
>> +
>> +     gitmodules_config();
>> +     name = submodule_name_for_path(argv[0]);
>> +
>> +     if (!name)
>> +             die("No submodule mapping found in .gitmodules for path '%s'", argv[0]);
>> +
>> +     printf("%s\n", name);
>> +     return 0;
>> +}
>> +
>>  int cmd_submodule__helper(int argc, const char **argv, const char *prefix)
>>  {
>>       if (argc < 2)
>> @@ -106,6 +125,9 @@ int cmd_submodule__helper(int argc, const char **argv, const char *prefix)
>>       if (!strcmp(argv[1], "module_list"))
>>               return module_list(argc - 1, argv + 1, prefix);
>>
>> +     if (!strcmp(argv[1], "module_name"))
>> +             return module_name(argc - 2, argv + 2, prefix);
>> +
>>  usage:
>>       usage("git submodule--helper module_list\n");
>>  }
>> diff --git a/git-submodule.sh b/git-submodule.sh
>> index af9ecef..e6ff38d 100755
>> --- a/git-submodule.sh
>> +++ b/git-submodule.sh
>> @@ -178,24 +178,6 @@ get_submodule_config () {
>>       printf '%s' "${value:-$default}"
>>  }
>>
>> -
>> -#
>> -# Map submodule path to submodule name
>> -#
>> -# $1 = path
>> -#
>> -module_name()
>> -{
>> -     # Do we have "submodule.<something>.path = $1" defined in .gitmodules file?
>> -     sm_path="$1"
>> -     re=$(printf '%s\n' "$1" | sed -e 's/[].[^$\\*]/\\&/g')
>> -     name=$( git config -f .gitmodules --get-regexp '^submodule\..*\.path$' |
>> -             sed -n -e 's|^submodule\.\(.*\)\.path '"$re"'$|\1|p' )
>> -     test -z "$name" &&
>> -     die "$(eval_gettext "No submodule mapping found in .gitmodules for path '\$sm_path'")"
>> -     printf '%s\n' "$name"
>> -}
>> -
>>  #
>>  # Clone a submodule
>>  #
>> @@ -498,7 +480,7 @@ cmd_foreach()
>>               then
>>                       displaypath=$(relative_path "$sm_path")
>>                       say "$(eval_gettext "Entering '\$prefix\$displaypath'")"
>> -                     name=$(module_name "$sm_path")
>> +                     name=$(git submodule--helper module_name "$sm_path")
>>                       (
>>                               prefix="$prefix$sm_path/"
>>                               clear_local_git_env
>> @@ -554,7 +536,7 @@ cmd_init()
>>       while read mode sha1 stage sm_path
>>       do
>>               die_if_unmatched "$mode"
>> -             name=$(module_name "$sm_path") || exit
>> +             name=$(git submodule--helper module_name "$sm_path") || exit
>>
>>               displaypath=$(relative_path "$sm_path")
>>
>> @@ -636,7 +618,7 @@ cmd_deinit()
>>       while read mode sha1 stage sm_path
>>       do
>>               die_if_unmatched "$mode"
>> -             name=$(module_name "$sm_path") || exit
>> +             name=$(git submodule--helper module_name "$sm_path") || exit
>>
>>               displaypath=$(relative_path "$sm_path")
>>
>> @@ -758,7 +740,7 @@ cmd_update()
>>                       echo >&2 "Skipping unmerged submodule $prefix$sm_path"
>>                       continue
>>               fi
>> -             name=$(module_name "$sm_path") || exit
>> +             name=$(git submodule--helper module_name "$sm_path") || exit
>>               url=$(git config submodule."$name".url)
>>               branch=$(get_submodule_config "$name" branch master)
>>               if ! test -z "$update"
>> @@ -1022,7 +1004,7 @@ cmd_summary() {
>>                       # Respect the ignore setting for --for-status.
>>                       if test -n "$for_status"
>>                       then
>> -                             name=$(module_name "$sm_path")
>> +                             name=$(git submodule--helper module_name "$sm_path")
>>                               ignore_config=$(get_submodule_config "$name" ignore none)
>>                               test $status != A && test $ignore_config = all && continue
>>                       fi
>> @@ -1184,7 +1166,7 @@ cmd_status()
>>       while read mode sha1 stage sm_path
>>       do
>>               die_if_unmatched "$mode"
>> -             name=$(module_name "$sm_path") || exit
>> +             name=$(git submodule--helper module_name "$sm_path") || exit
>>               url=$(git config submodule."$name".url)
>>               displaypath=$(relative_path "$prefix$sm_path")
>>               if test "$stage" = U
>> @@ -1261,7 +1243,7 @@ cmd_sync()
>>       while read mode sha1 stage sm_path
>>       do
>>               die_if_unmatched "$mode"
>> -             name=$(module_name "$sm_path")
>> +             name=$(git submodule--helper module_name "$sm_path")
>>               url=$(git config -f .gitmodules --get submodule."$name".url)
>>
>>               # Possibly a url relative to parent
>> diff --git a/submodule.c b/submodule.c
>> index 15e90d1..78d7616 100644
>> --- a/submodule.c
>> +++ b/submodule.c
>> @@ -686,6 +686,16 @@ static void calculate_changed_submodule_paths(void)
>>       initialized_fetch_ref_tips = 0;
>>  }
>>
>> +const char *submodule_name_for_path(const char *path)
>> +{
>> +     struct string_list_item *item;
>> +     item = unsorted_string_list_lookup(&config_name_for_path, path);
>> +     if (!item)
>> +             return NULL;
>> +
>> +     return item->util;
>> +}
>> +
>>  int fetch_populated_submodules(const struct argv_array *options,
>>                              const char *prefix, int command_line_option,
>>                              int quiet)
>> @@ -693,7 +703,6 @@ int fetch_populated_submodules(const struct argv_array *options,
>>       int i, result = 0;
>>       struct child_process cp = CHILD_PROCESS_INIT;
>>       struct argv_array argv = ARGV_ARRAY_INIT;
>> -     struct string_list_item *name_for_path;
>>       const char *work_tree = get_git_work_tree();
>>       if (!work_tree)
>>               goto out;
>> @@ -723,10 +732,10 @@ int fetch_populated_submodules(const struct argv_array *options,
>>               if (!S_ISGITLINK(ce->ce_mode))
>>                       continue;
>>
>> -             name = ce->name;
>> -             name_for_path = unsorted_string_list_lookup(&config_name_for_path, ce->name);
>> -             if (name_for_path)
>> -                     name = name_for_path->util;
>> +             name = submodule_name_for_path(ce->name);
>> +             if (!name)
>> +                     /* Not in .gitmodules, try the default name == path */
>> +                     name = ce->name;
>>
>>               default_argv = "yes";
>>               if (command_line_option == RECURSE_SUBMODULES_DEFAULT) {
>> diff --git a/submodule.h b/submodule.h
>> index 7beec48..fc7f8a6 100644
>> --- a/submodule.h
>> +++ b/submodule.h
>> @@ -41,5 +41,6 @@ int find_unpushed_submodules(unsigned char new_sha1[20], const char *remotes_nam
>>               struct string_list *needs_pushing);
>>  int push_unpushed_submodules(unsigned char new_sha1[20], const char *remotes_name);
>>  void connect_work_tree_and_git_dir(const char *work_tree, const char *git_dir);
>> +const char *submodule_name_for_path(const char *path);
>>
>>  #endif

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] submodule: implement `module_name` as a builtin helper
  2015-08-07 20:49             ` Stefan Beller
@ 2015-08-07 21:14               ` Junio C Hamano
  2015-08-07 21:21                 ` Stefan Beller
  0 siblings, 1 reply; 27+ messages in thread
From: Junio C Hamano @ 2015-08-07 21:14 UTC (permalink / raw)
  To: Stefan Beller; +Cc: Jens Lehmann, Heiko Voigt, git

Stefan Beller <sbeller@google.com> writes:

> On Fri, Aug 7, 2015 at 1:17 PM, Junio C Hamano <gitster@pobox.com> wrote:
>> Jens Lehmann <Jens.Lehmann@web.de> writes:
>>
>> This change...
>>
>>>> @@ -723,10 +733,8 @@ int fetch_populated_submodules(const struct argv_array *options,
>>>>              if (!S_ISGITLINK(ce->ce_mode))
>>>>                      continue;
>>>>
>>>> -            name = ce->name;
>>>> -            name_for_path = unsorted_string_list_lookup(&config_name_for_path, ce->name);
>>>> -            if (name_for_path)
>>>> -                    name = name_for_path->util;
>>>> +            name_for_path = submodule_name_for_path(ce->name);
>>>> +            name =  name_for_path ? name_for_path : ce->name;
>>
>> ... interacts with Heiko's cached submodule config work that seems
>> to have stalled.
>
> We can drop that hunk as it only uses the new method
> `submodule_name_for_path` but doesn't change functionality.
> So if you want to keep Heikos work, I'll just resend the patch
> without that hunk.

Does such a result even make sense?  Note that I wasn't talking
about textual conflict.

If we followed what you just said, that patch will try to directly
read the data in config_name_for_path string list, which is removed
by Heiko's series, if I am reading it right.

In the new world order with Heiko's series, the way you grab
submodule configuration data is to call submodule_from_path() or
submodule_from_name() and they allow you to read from .gitmodules
either in a commit (for historical state), the index, or from the
working tree.  Which should be much cleaner and goes in the right
direction in the longer term.

And the best part of the story is that your module_name would be
just calling submodule_from_path() and peeking into a field.

> 2) Come up with a good thread pool abstraction
>    (Started as "[RFC/PATCH 0/4] parallel fetch for submodules" )
>    This abstraction (if done right) will allow us to use it in different places
>    easily. I started it as part of "git fetch --recurse-submodules" because
>    it is submodule related and reasonably sized

I personally think this gives the most bang-for-buck.  Write that
and expose it as "git submodule for-each-parallel", which takes the
shell scriptlet that currently is the loop body of "while read mode
sha1 stage sm_path" in update and clone.  You will have immediate
and large payback.

And you do not even need module_list and module_name written in C in
order to do so.  Not that these two are not trivial to implement, but
the payoff from them is not as large as from for-each-parallel ;-)

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] submodule: implement `module_name` as a builtin helper
  2015-08-07 21:14               ` Junio C Hamano
@ 2015-08-07 21:21                 ` Stefan Beller
  2015-08-07 21:32                   ` Junio C Hamano
  2015-08-07 22:42                   ` Junio C Hamano
  0 siblings, 2 replies; 27+ messages in thread
From: Stefan Beller @ 2015-08-07 21:21 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Jens Lehmann, Heiko Voigt, git

On Fri, Aug 7, 2015 at 2:14 PM, Junio C Hamano <gitster@pobox.com> wrote:
> Stefan Beller <sbeller@google.com> writes:
>
>> On Fri, Aug 7, 2015 at 1:17 PM, Junio C Hamano <gitster@pobox.com> wrote:
>>> Jens Lehmann <Jens.Lehmann@web.de> writes:
>>>
>>> This change...
>>>
>>>>> @@ -723,10 +733,8 @@ int fetch_populated_submodules(const struct argv_array *options,
>>>>>              if (!S_ISGITLINK(ce->ce_mode))
>>>>>                      continue;
>>>>>
>>>>> -            name = ce->name;
>>>>> -            name_for_path = unsorted_string_list_lookup(&config_name_for_path, ce->name);
>>>>> -            if (name_for_path)
>>>>> -                    name = name_for_path->util;
>>>>> +            name_for_path = submodule_name_for_path(ce->name);
>>>>> +            name =  name_for_path ? name_for_path : ce->name;
>>>
>>> ... interacts with Heiko's cached submodule config work that seems
>>> to have stalled.
>>
>> We can drop that hunk as it only uses the new method
>> `submodule_name_for_path` but doesn't change functionality.
>> So if you want to keep Heikos work, I'll just resend the patch
>> without that hunk.
>
> Does such a result even make sense?  Note that I wasn't talking
> about textual conflict.
>
> If we followed what you just said, that patch will try to directly
> read the data in config_name_for_path string list, which is removed
> by Heiko's series, if I am reading it right.
>
> In the new world order with Heiko's series, the way you grab
> submodule configuration data is to call submodule_from_path() or
> submodule_from_name() and they allow you to read from .gitmodules
> either in a commit (for historical state), the index, or from the
> working tree.  Which should be much cleaner and goes in the right
> direction in the longer term.
>
> And the best part of the story is that your module_name would be
> just calling submodule_from_path() and peeking into a field.
>
>> 2) Come up with a good thread pool abstraction
>>    (Started as "[RFC/PATCH 0/4] parallel fetch for submodules" )
>>    This abstraction (if done right) will allow us to use it in different places
>>    easily. I started it as part of "git fetch --recurse-submodules" because
>>    it is submodule related and reasonably sized
>
> I personally think this gives the most bang-for-buck.  Write that
> and expose it as "git submodule for-each-parallel", which takes the
> shell scriptlet that currently is the loop body of "while read mode
> sha1 stage sm_path" in update and clone.  You will have immediate
> and large payback.

You said that before. I feel like this is a bit to narrow. A "git submodule
for-each-parallel" would be a very specific tool which we would use to
make the different submodule operations parallel with ease. But it would
be very submodule specifc I guess?

That's why I want to be a bit more generic and have this thread pool API
done in C, such that "any for loop" in git can be easily replaced by using
the thread pool. I think of "git fetch --all" specially.

>
> And you do not even need module_list and module_name written in C in
> order to do so.  Not that these two are not trivial to implement, but
> the payoff from them is not as large as from for-each-parallel ;-)
>

I think I can do this for-each-parallel once I have the more generic thread
pooling done. All that is left now is a good handling of stdout/stderr, which
I am not yet convinced how to do it right. Maybe each task accumulates
messages in two string buffers and then the thread pool will output the
string buffer one a task is done.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] submodule: implement `module_name` as a builtin helper
  2015-08-07 21:21                 ` Stefan Beller
@ 2015-08-07 21:32                   ` Junio C Hamano
  2015-08-07 22:04                     ` Stefan Beller
  2015-08-07 22:42                   ` Junio C Hamano
  1 sibling, 1 reply; 27+ messages in thread
From: Junio C Hamano @ 2015-08-07 21:32 UTC (permalink / raw)
  To: Stefan Beller; +Cc: Jens Lehmann, Heiko Voigt, git

Stefan Beller <sbeller@google.com> writes:

> On Fri, Aug 7, 2015 at 2:14 PM, Junio C Hamano <gitster@pobox.com> wrote:
>> Stefan Beller <sbeller@google.com> writes:
>>> ...
>>> We can drop that hunk as it only uses the new method
>>> `submodule_name_for_path` but doesn't change functionality.
>>> So if you want to keep Heikos work, I'll just resend the patch
>>> without that hunk.
>>
>> Does such a result even make sense?  Note that I wasn't talking
>> about textual conflict.
>>
>> If we followed what you just said, that patch will try to directly
>> read the data in config_name_for_path string list, which is removed
>> by Heiko's series, if I am reading it right.

By the way, the above is more important part of the message you are
responding to.  The result does not simply link, because your
unsorted_string_list_lookup() will no longer have the string list in
the first place X-<.

>>> 2) Come up with a good thread pool abstraction
>>>    (Started as "[RFC/PATCH 0/4] parallel fetch for submodules" )
>>>    This abstraction (if done right) will allow us to use it in different places
>>>    easily. I started it as part of "git fetch --recurse-submodules" because
>>>    it is submodule related and reasonably sized
>>
>> I personally think this gives the most bang-for-buck.  Write that
>> and expose it as "git submodule for-each-parallel", which takes the
>> shell scriptlet that currently is the loop body of "while read mode
>> sha1 stage sm_path" in update and clone.  You will have immediate
>> and large payback.
>
> You said that before. I feel like this is a bit to narrow.

That depends on how good a design you make for the internal
"parallel execution" engine.  If it is made to take an arbitrary C
function with list of arguments to call it with, for-each-parallel
would be just a degenerate and narrow case where that arbitrary C
function happens to be exec'ing a shell and feed a shell scriptlet
to it.

The internal parallel execution engine would be reusable without any
change to call C native functions, allowing you to do everything
inside the process in the future.  And the "narrow" case is a good
first step to validate your design actually _works_.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] submodule: implement `module_name` as a builtin helper
  2015-08-07 21:32                   ` Junio C Hamano
@ 2015-08-07 22:04                     ` Stefan Beller
  2015-08-07 22:18                       ` Junio C Hamano
  0 siblings, 1 reply; 27+ messages in thread
From: Stefan Beller @ 2015-08-07 22:04 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Jens Lehmann, Heiko Voigt, git

On Fri, Aug 7, 2015 at 2:32 PM, Junio C Hamano <gitster@pobox.com> wrote:
> Stefan Beller <sbeller@google.com> writes:
>
>> On Fri, Aug 7, 2015 at 2:14 PM, Junio C Hamano <gitster@pobox.com> wrote:
>>> Stefan Beller <sbeller@google.com> writes:
>>>> ...
>>>> We can drop that hunk as it only uses the new method
>>>> `submodule_name_for_path` but doesn't change functionality.
>>>> So if you want to keep Heikos work, I'll just resend the patch
>>>> without that hunk.
>>>
>>> Does such a result even make sense?  Note that I wasn't talking
>>> about textual conflict.
>>>
>>> If we followed what you just said, that patch will try to directly
>>> read the data in config_name_for_path string list, which is removed
>>> by Heiko's series, if I am reading it right.
>
> By the way, the above is more important part of the message you are
> responding to.  The result does not simply link, because your
> unsorted_string_list_lookup() will no longer have the string list in
> the first place X-<.

I looked through Heikos series and think it is an improvement. I mean I
can redo my patches on top of his. Specially this patch will be easy,
as patch 2/4 (extract functions for submodule config set and lookup)
implements get_name_for_path. All I would need to do then is expose it
to the shell via the helper.

>
>>>> 2) Come up with a good thread pool abstraction
>>>>    (Started as "[RFC/PATCH 0/4] parallel fetch for submodules" )
>>>>    This abstraction (if done right) will allow us to use it in different places
>>>>    easily. I started it as part of "git fetch --recurse-submodules" because
>>>>    it is submodule related and reasonably sized
>>>
>>> I personally think this gives the most bang-for-buck.  Write that
>>> and expose it as "git submodule for-each-parallel", which takes the
>>> shell scriptlet that currently is the loop body of "while read mode
>>> sha1 stage sm_path" in update and clone.  You will have immediate
>>> and large payback.
>>
>> You said that before. I feel like this is a bit to narrow.
>
> That depends on how good a design you make for the internal
> "parallel execution" engine.  If it is made to take an arbitrary C
> function with list of arguments to call it with, for-each-parallel
> would be just a degenerate and narrow case where that arbitrary C
> function happens to be exec'ing a shell and feed a shell scriptlet
> to it.
>
> The internal parallel execution engine would be reusable without any
> change to call C native functions, allowing you to do everything
> inside the process in the future.  And the "narrow" case is a good
> first step to validate your design actually _works_.
>

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] submodule: implement `module_name` as a builtin helper
  2015-08-07 22:04                     ` Stefan Beller
@ 2015-08-07 22:18                       ` Junio C Hamano
  2015-08-07 23:12                         ` Stefan Beller
  0 siblings, 1 reply; 27+ messages in thread
From: Junio C Hamano @ 2015-08-07 22:18 UTC (permalink / raw)
  To: Stefan Beller; +Cc: Jens Lehmann, Heiko Voigt, git

Stefan Beller <sbeller@google.com> writes:

> On Fri, Aug 7, 2015 at 2:32 PM, Junio C Hamano <gitster@pobox.com> wrote:
>
>>>> If we followed what you just said, that patch will try to directly
>>>> read the data in config_name_for_path string list, which is removed
>>>> by Heiko's series, if I am reading it right.
>>
>> By the way, the above is more important part of the message you are
>> responding to.  The result does not simply link, because your
>> unsorted_string_list_lookup() will no longer have the string list in
>> the first place X-<.
>
> I looked through Heikos series and think it is an improvement. I mean I
> can redo my patches on top of his. Specially this patch will be easy,
> as patch 2/4 (extract functions for submodule config set and lookup)
> implements get_name_for_path. All I would need to do then is expose it
> to the shell via the helper.

Yes, that is exactly what I said a few messages ago, wasn't it?

But that would require that you read and understand Heiko's work and
that you understand what its future direction should be, communicate
that vision to others to share, before building on top of it.  And
with that effort, you would already be in a good position to polish
Heiko's stalled work and move it forward.  After all, you cannot
just build on a stalled work, declare "my part is done; the result
is not mergeable because the foundation is not cooked, but that is
not my problem" ;-).

IIRC, the issues around the topic were nothing show-stopping, but
the primary reason it stalled was that Heiko got busy with his other
obligations, so I'd appreciate others like you to help the topic
move forward.

Thanks.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] submodule: implement `module_name` as a builtin helper
  2015-08-07 21:21                 ` Stefan Beller
  2015-08-07 21:32                   ` Junio C Hamano
@ 2015-08-07 22:42                   ` Junio C Hamano
  2015-08-07 23:19                     ` Stefan Beller
  1 sibling, 1 reply; 27+ messages in thread
From: Junio C Hamano @ 2015-08-07 22:42 UTC (permalink / raw)
  To: Stefan Beller; +Cc: Jens Lehmann, Heiko Voigt, git

Stefan Beller <sbeller@google.com> writes:

> That's why I want to be a bit more generic and have this thread pool API
> done in C, such that "any for loop" in git can be easily replaced by using
> the thread pool. I think of "git fetch --all" specially.

One more thing, as I didn't notice that you kept repeating "thread"
pool API.

While I doubt that you would gain much by using threads in place of
processes to perform parallel "submodule update", "submodule clone",
"fetch all", etc., all of which are fairly well isolated and heavy
weight operations themselves, and I suspect that the implementation
simplicity of using separate processes would probably be huge plus
compared to any possible upside you may gain from using threads, if
you really want to go the "thread" route, the first thing to try
would be to see if a few places we already use threads for
parallelism (namely, "grep", "pack-objects", "preload-index" and
"index-pack") can be factored out and model your new API around the
commonality among them.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] submodule: implement `module_name` as a builtin helper
  2015-08-07 22:18                       ` Junio C Hamano
@ 2015-08-07 23:12                         ` Stefan Beller
  0 siblings, 0 replies; 27+ messages in thread
From: Stefan Beller @ 2015-08-07 23:12 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Jens Lehmann, Heiko Voigt, git

On Fri, Aug 7, 2015 at 3:18 PM, Junio C Hamano <gitster@pobox.com> wrote:
> Stefan Beller <sbeller@google.com> writes:
>
>> On Fri, Aug 7, 2015 at 2:32 PM, Junio C Hamano <gitster@pobox.com> wrote:
>>
>>>>> If we followed what you just said, that patch will try to directly
>>>>> read the data in config_name_for_path string list, which is removed
>>>>> by Heiko's series, if I am reading it right.
>>>
>>> By the way, the above is more important part of the message you are
>>> responding to.  The result does not simply link, because your
>>> unsorted_string_list_lookup() will no longer have the string list in
>>> the first place X-<.
>>
>> I looked through Heikos series and think it is an improvement. I mean I
>> can redo my patches on top of his. Specially this patch will be easy,
>> as patch 2/4 (extract functions for submodule config set and lookup)
>> implements get_name_for_path. All I would need to do then is expose it
>> to the shell via the helper.
>
> Yes, that is exactly what I said a few messages ago, wasn't it?

Yes, you did. I needed to repeat it to actually mentally process it.

>
> But that would require that you read and understand Heiko's work and
> that you understand what its future direction should be, communicate
> that vision to others to share, before building on top of it.  And
> with that effort, you would already be in a good position to polish
> Heiko's stalled work and move it forward.  After all, you cannot
> just build on a stalled work, declare "my part is done; the result
> is not mergeable because the foundation is not cooked, but that is
> not my problem" ;-).

That would be an easy world to live in, but I agree that I should get the
fundamentals right.

>
> IIRC, the issues around the topic were nothing show-stopping, but
> the primary reason it stalled was that Heiko got busy with his other
> obligations, so I'd appreciate others like you to help the topic
> move forward.

Ok, I'll dig up the old reviews and review it myself before continuing
on my patches.

>
> Thanks.
>

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] submodule: implement `module_name` as a builtin helper
  2015-08-07 22:42                   ` Junio C Hamano
@ 2015-08-07 23:19                     ` Stefan Beller
  2015-08-08  0:21                       ` Junio C Hamano
  0 siblings, 1 reply; 27+ messages in thread
From: Stefan Beller @ 2015-08-07 23:19 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Jens Lehmann, Heiko Voigt, git

On Fri, Aug 7, 2015 at 3:42 PM, Junio C Hamano <gitster@pobox.com> wrote:
> Stefan Beller <sbeller@google.com> writes:
>
>> That's why I want to be a bit more generic and have this thread pool API
>> done in C, such that "any for loop" in git can be easily replaced by using
>> the thread pool. I think of "git fetch --all" specially.
>
> One more thing, as I didn't notice that you kept repeating "thread"
> pool API.

Yeah I intended to use both threads and processes for the heavy submodule
operations.

Each thread in the thread pool would setup the right argument lists
and environment
and then spawn a process for heavy weight operations, wait for the process,
maybe process its output.

Maybe I should omit the whole thread pool and only use select from a single
threaded main program.

>
> While I doubt that you would gain much by using threads in place of
> processes to perform parallel "submodule update", "submodule clone",
> "fetch all", etc., all of which are fairly well isolated and heavy
> weight operations themselves, and I suspect that the implementation
> simplicity of using separate processes would probably be huge plus
> compared to any possible upside you may gain from using threads, if
> you really want to go the "thread" route, the first thing to try
> would be to see if a few places we already use threads for
> parallelism (namely, "grep", "pack-objects", "preload-index" and
> "index-pack") can be factored out and model your new API around the
> commonality among them.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] submodule: implement `module_name` as a builtin helper
  2015-08-07 23:19                     ` Stefan Beller
@ 2015-08-08  0:21                       ` Junio C Hamano
  2015-08-08  6:20                         ` Junio C Hamano
  0 siblings, 1 reply; 27+ messages in thread
From: Junio C Hamano @ 2015-08-08  0:21 UTC (permalink / raw)
  To: Stefan Beller; +Cc: Jens Lehmann, Heiko Voigt, git

Stefan Beller <sbeller@google.com> writes:

>> One more thing, as I didn't notice that you kept repeating "thread"
>> pool API.
>
> Yeah I intended to use both threads and processes for the heavy submodule
> operations.

OK.  I somehow had an impression that it might be more tricky than
it is worth to spawn/run_command out of a thread/run_async, but if
it makes it easier and more generic to correctly arrange the
thread-pool API to allow the per-thread functions to run_command(),
I wouldn't object to that approach at all.

Then for-each-parallel would truly become a trivial application of
that API.  Your per-thread function happens to be a one that
prepares appropriate "struct child_process" and calls run_command()
to interact with that single child, receiving its output and culling
it when it is done.

>> ... if
>> you really want to go the "thread" route, the first thing to try
>> would be to see if a few places we already use threads for
>> parallelism (namely, "grep", "pack-objects", "preload-index" and
>> "index-pack") can be factored out and model your new API around the
>> commonality among them.

And obviously, doing your pool API around threads will allow you to
throw future per-thread function that do not involve run_command()
at all at your API, and it will make it easy to adapt the current
threaded parts of the system to the API.

Perfect.

;-)

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] submodule: implement `module_name` as a builtin helper
  2015-08-08  0:21                       ` Junio C Hamano
@ 2015-08-08  6:20                         ` Junio C Hamano
  2015-08-10 17:03                           ` Stefan Beller
  0 siblings, 1 reply; 27+ messages in thread
From: Junio C Hamano @ 2015-08-08  6:20 UTC (permalink / raw)
  To: Stefan Beller; +Cc: Jens Lehmann, Heiko Voigt, git

Junio C Hamano <gitster@pobox.com> writes:

>>> ... if
>>> you really want to go the "thread" route, the first thing to try
>>> would be to see if a few places we already use threads for
>>> parallelism (namely, "grep", "pack-objects", "preload-index" and
>>> "index-pack") can be factored out and model your new API around the
>>> commonality among them.
>
> And obviously, doing your pool API around threads will allow you to
> throw future per-thread function that do not involve run_command()
> at all at your API, and it will make it easy to adapt the current
> threaded parts of the system to the API.

Just a few random thoughts before going to bed and going offline for
the weekend...

Eventually, we would want to do "submodule update" of a top-level
project that has 500 submodules underneath, but obviously we would
not want to blindly spawn 500 threads, each of which runs "fetch",
all at the same time.  We'd want to limit the parallelism to a sane
limit (say, 16 or 32), stuff 500 work units to a queue, from which
that many number of worker bees grab work unit one by one to process
and then come back to ask for more work.

And we would eventually want to be able to do this even when these
500 submodules are spread across multiple levels of nested
submodules (e.g. top-level may have 8 submodules, and they have 16
nested subsubmodules each on average, each of which may have 4
nested subsubsubmodules on average).  Specifying -j16 at the top
level and apportioning the parallelism to recursive invoation of
"submodule update" in such a way that the overall process is
efficient and without waste would be a bit tricky.

In such a nested submodule case, we may want to instead try to
enumerate these 500 submodules upfront with unbounded parallelism
(e.g. the top-level will ask 4 worker bees to process immediate 8
submodules, and they each spawn 4 worker bees to process their
immediate 16 submodules, and so on---it is unbounded because we do
not know upfront how deep the nesting is).

Let's call that a recursive module_list.  You would want out of a
recursive module_list:

 - the path to the submodule (or "." for the top-level) to indicate
   where in the nested hierarchy the information came from;

 - the information the flat module_list gives for that location.

Since you already have module_list() function natively callable from
C and also it is available via "git submodule--helper module_list",
implementing a recursive module_list would be a good first proof of
concept exercise for your "thread pool" engine.  You can employ the
"dual implementation" trick to call

 - a version that tells the thread to run the native C version of
   module_list(),

 - another version that tells the thread to run_command()
   "submodule--helper module_list" in the top-level and nested
   submodules.

and collect and compare their results and performance.

That will not just be a good proof of concept for the pool
implementation.

Once you have such a recursive module_list, you can use it as a way
to easily obtain such a "unified view" list of all submodules.  That
can be used to stuff a flat work unit queue to implement reasonably
bounded parallelism.

Your recursive "submoule update" implementation could be

 - Run recursive module_list to stuff the work queue with these 500
   submodules (possibly spread across in top-level and in nested
   submodules, or all 500 in the flat top-level);

 - Start N worker bees, and tell them to pick from that work queue,
   each element of which tells them to process which submodule that
   resides in where (either in the top-level project or in a
   submodule).

And each work element would essentially be to run "git fetch" in
that submodule directory.

Hmm...

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] submodule: implement `module_name` as a builtin helper
  2015-08-08  6:20                         ` Junio C Hamano
@ 2015-08-10 17:03                           ` Stefan Beller
  0 siblings, 0 replies; 27+ messages in thread
From: Stefan Beller @ 2015-08-10 17:03 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Jens Lehmann, Heiko Voigt, git

On Fri, Aug 7, 2015 at 11:20 PM, Junio C Hamano <gitster@pobox.com> wrote:
> Junio C Hamano <gitster@pobox.com> writes:
>
>>>> ... if
>>>> you really want to go the "thread" route, the first thing to try
>>>> would be to see if a few places we already use threads for
>>>> parallelism (namely, "grep", "pack-objects", "preload-index" and
>>>> "index-pack") can be factored out and model your new API around the
>>>> commonality among them.
>>
>> And obviously, doing your pool API around threads will allow you to
>> throw future per-thread function that do not involve run_command()
>> at all at your API, and it will make it easy to adapt the current
>> threaded parts of the system to the API.
>
> Just a few random thoughts before going to bed and going offline for
> the weekend...
>
> Eventually, we would want to do "submodule update" of a top-level
> project that has 500 submodules underneath, but obviously we would
> not want to blindly spawn 500 threads, each of which runs "fetch",
> all at the same time.  We'd want to limit the parallelism to a sane
> limit (say, 16 or 32), stuff 500 work units to a queue, from which
> that many number of worker bees grab work unit one by one to process
> and then come back to ask for more work.

This is exactly what is currently implemented and works in patch
"[RFC PATCH 2/4] Add a workdispatcher to get work done in parallel"

>
> And we would eventually want to be able to do this even when these
> 500 submodules are spread across multiple levels of nested
> submodules (e.g. top-level may have 8 submodules, and they have 16
> nested subsubmodules each on average, each of which may have 4
> nested subsubsubmodules on average).  Specifying -j16 at the top
> level and apportioning the parallelism to recursive invoation of
> "submodule update" in such a way that the overall process is
> efficient and without waste would be a bit tricky.
>
> In such a nested submodule case, we may want to instead try to
> enumerate these 500 submodules upfront with unbounded parallelism

The problem here is we need to have finished cloning at least one top level
submodule before we can add any further sub-submodules into the work
queue. So if there are only 4 top level submodules we'd have a slow start.

If we have only one top level work queue, the deeper nesting levels
will not explode unbound, but eventually we will reach the 16 threads
and keep working with them, and once git submodule is ported to C
we don't need to have process invocations, but can rely on the just the
threading done right. And then we it should be rather easy to only use
one top level task queue.

> (e.g. the top-level will ask 4 worker bees to process immediate 8
> submodules, and they each spawn 4 worker bees to process their
> immediate 16 submodules, and so on---it is unbounded because we do
> not know upfront how deep the nesting is).
>
> Let's call that a recursive module_list.  You would want out of a
> recursive module_list:
>
>  - the path to the submodule (or "." for the top-level) to indicate
>    where in the nested hierarchy the information came from;
>
>  - the information the flat module_list gives for that location.

I only see value in this recursive module_list approach for updating
the work tree (fetch --recurse-submodules=yes, instead of cloning)
as you already have most of the submodules there (there may be new
submodules after fetching).

Also collecting 500 submodule information is in the order of reading
500 files. But then we need to do 500 fetches. And doing a fetch takes
some orders of magnitude longer than reading a file, so I am not convinced
a parallel collection of work to be done is a good first step?

>
> Since you already have module_list() function natively callable from
> C and also it is available via "git submodule--helper module_list",
> implementing a recursive module_list would be a good first proof of
> concept exercise for your "thread pool" engine.  You can employ the
> "dual implementation" trick to call
>
>  - a version that tells the thread to run the native C version of
>    module_list(),
>
>  - another version that tells the thread to run_command()
>    "submodule--helper module_list" in the top-level and nested
>    submodules.
>
> and collect and compare their results and performance.
>
> That will not just be a good proof of concept for the pool
> implementation.
>
> Once you have such a recursive module_list, you can use it as a way
> to easily obtain such a "unified view" list of all submodules.  That
> can be used to stuff a flat work unit queue to implement reasonably
> bounded parallelism.
>
> Your recursive "submoule update" implementation could be
>
>  - Run recursive module_list to stuff the work queue with these 500
>    submodules (possibly spread across in top-level and in nested
>    submodules, or all 500 in the flat top-level);
>
>  - Start N worker bees, and tell them to pick from that work queue,
>    each element of which tells them to process which submodule that
>    resides in where (either in the top-level project or in a
>    submodule).
>
> And each work element would essentially be to run "git fetch" in
> that submodule directory.

As said before the 500 fetches are many orders of magnitude slower than
collecting the information what to do.

>
> Hmm...
>
>

^ permalink raw reply	[flat|nested] 27+ messages in thread

end of thread, other threads:[~2015-08-10 17:03 UTC | newest]

Thread overview: 27+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-08-05  0:04 [PATCH 1/4] submodule: implement `module_list` as a builtin helper Stefan Beller
2015-08-05  0:04 ` [PATCH 2/4] submodule: implement `module_name` " Stefan Beller
2015-08-05  0:05   ` Stefan Beller
2015-08-05  0:58   ` Eric Sunshine
2015-08-05 16:29     ` Stefan Beller
2015-08-05 19:06   ` Jens Lehmann
2015-08-05 19:55     ` Stefan Beller
2015-08-05 21:08       ` [PATCH] " Stefan Beller
2015-08-06 19:49         ` Jens Lehmann
2015-08-06 21:38           ` Stefan Beller
2015-08-07 20:03             ` Junio C Hamano
2015-08-07 20:54               ` Stefan Beller
2015-08-07 20:17           ` Junio C Hamano
2015-08-07 20:49             ` Stefan Beller
2015-08-07 21:14               ` Junio C Hamano
2015-08-07 21:21                 ` Stefan Beller
2015-08-07 21:32                   ` Junio C Hamano
2015-08-07 22:04                     ` Stefan Beller
2015-08-07 22:18                       ` Junio C Hamano
2015-08-07 23:12                         ` Stefan Beller
2015-08-07 22:42                   ` Junio C Hamano
2015-08-07 23:19                     ` Stefan Beller
2015-08-08  0:21                       ` Junio C Hamano
2015-08-08  6:20                         ` Junio C Hamano
2015-08-10 17:03                           ` Stefan Beller
2015-08-05 18:31 ` [PATCH 1/4] submodule: implement `module_list` " Jens Lehmann
2015-08-07 19:53 ` Junio C Hamano

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).