All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCHv9 0/6]  Expose submodule parallelism to the user
@ 2016-02-09 20:54 Stefan Beller
  2016-02-09 20:54 ` [PATCHv9 1/6] submodule-config: keep update strategy around Stefan Beller
                   ` (6 more replies)
  0 siblings, 7 replies; 30+ messages in thread
From: Stefan Beller @ 2016-02-09 20:54 UTC (permalink / raw)
  To: gitster, jrnieder, git, Jens.Lehmann; +Cc: Stefan Beller

This replaces sb/submodule-parallel-update.
(which is based on sb/submodule-parallel-fetch)

Thanks to Junio and Jonathan for review!

* s/config_parallel_submodules/parallel_submodules/ as it is not in config any
  more. Also ease up the default setting. 

* use an enum for submodule update strategy

* parsing of submodule.fetchJobs in submodule.c instead of submodule-config.
  -> no need for 2 refactoring patches
  
* This seems to clash with 00/20] refs backend.
> Applied this on top of a merge between the current 'master' and
> 'sb/submodule-parallel-update' topic to untangle the dependency;
> otherwise there is no way for this topic to make progress X-<.

Anything I can do to help with easing the clash?

Thanks,
Stefan

interdiff to v8:

diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
index 7e01b85..6c74474 100644
--- a/builtin/submodule--helper.c
+++ b/builtin/submodule--helper.c
@@ -257,7 +257,7 @@ static int module_clone(int argc, const char **argv, const char *prefix)
 
 static int git_submodule_config(const char *var, const char *value, void *cb)
 {
-	return parse_submodule_config_option(var, value);
+	return submodule_config(var, value, cb);
 }
 
 struct submodule_update_clone {
@@ -316,7 +316,6 @@ static int update_clone_get_next_task(struct child_process *cp,
 		struct strbuf displaypath_sb = STRBUF_INIT;
 		struct strbuf sb = STRBUF_INIT;
 		const char *displaypath = NULL;
-		const char *update_module = NULL;
 		char *url = NULL;
 		int needs_cloning = 0;
 
@@ -340,13 +339,8 @@ static int update_clone_get_next_task(struct child_process *cp,
 		else
 			displaypath = ce->name;
 
-		if (pp->update)
-			update_module = pp->update;
-		if (!update_module)
-			update_module = sub->update;
-		if (!update_module)
-			update_module = "checkout";
-		if (!strcmp(update_module, "none")) {
+		if ((pp->update && !strcmp(pp->update, "none")) ||
+		    (!pp->update && sub->update == SM_UPDATE_NONE)) {
 			strbuf_addf(err, "Skipping submodule '%s'\n",
 				    displaypath);
 			goto cleanup_and_continue;
@@ -479,9 +473,7 @@ static int update_clone(int argc, const char **argv, const char *prefix)
 	git_config(git_submodule_config, NULL);
 
 	if (max_jobs < 0)
-		max_jobs = config_parallel_submodules();
-	if (max_jobs < 0)
-		max_jobs = 1;
+		max_jobs = parallel_submodules();
 
 	run_processes_parallel(max_jobs,
 			       update_clone_get_next_task,
diff --git a/submodule-config.c b/submodule-config.c
index 339b59d..d6c8d9c 100644
--- a/submodule-config.c
+++ b/submodule-config.c
@@ -32,7 +32,6 @@ enum lookup_type {
 
 static struct submodule_cache cache;
 static int is_cache_init;
-static unsigned long parallel_jobs = -1;
 
 static int config_path_cmp(const struct submodule_entry *a,
 			   const struct submodule_entry *b,
@@ -162,10 +161,27 @@ static struct submodule *cache_lookup_name(struct submodule_cache *cache,
 	return NULL;
 }
 
+static int name_and_item_from_var(const char *var, struct strbuf *name,
+				  struct strbuf *item)
+{
+	const char *subsection, *key;
+	int subsection_len, parse;
+	parse = parse_config_key(var, "submodule", &subsection,
+			&subsection_len, &key);
+	if (parse < 0 || !subsection)
+		return 0;
+
+	strbuf_add(name, subsection, subsection_len);
+	strbuf_addstr(item, key);
+
+	return 1;
+}
+
 static struct submodule *lookup_or_create_by_name(struct submodule_cache *cache,
 		const unsigned char *gitmodules_sha1, const char *name)
 {
 	struct submodule *submodule;
+	struct strbuf name_buf = STRBUF_INIT;
 
 	submodule = cache_lookup_name(cache, gitmodules_sha1, name);
 	if (submodule)
@@ -173,10 +189,13 @@ static struct submodule *lookup_or_create_by_name(struct submodule_cache *cache,
 
 	submodule = xmalloc(sizeof(*submodule));
 
-	submodule->name = xstrdup(name);
+	strbuf_addstr(&name_buf, name);
+	submodule->name = strbuf_detach(&name_buf, NULL);
+
 	submodule->path = NULL;
 	submodule->url = NULL;
-	submodule->update = NULL;
+	submodule->update = SM_UPDATE_UNSPECIFIED;
+	submodule->update_command = NULL;
 	submodule->fetch_recurse = RECURSE_SUBMODULES_NONE;
 	submodule->ignore = NULL;
 
@@ -229,34 +248,22 @@ struct parse_config_parameter {
 	int overwrite;
 };
 
-static int parse_generic_submodule_config(const char *key,
-					  const char *var,
-					  const char *value,
-					  struct parse_config_parameter *me)
+static int parse_config(const char *var, const char *value, void *data)
 {
-	if (!strcmp(key, "fetchjobs")) {
-		if (!git_parse_ulong(value, &parallel_jobs)) {
-			warning(_("Error parsing submodule.fetchJobs; Defaulting to 1."));
-			parallel_jobs = 1;
-		}
-	}
+	struct parse_config_parameter *me = data;
+	struct submodule *submodule;
+	struct strbuf name = STRBUF_INIT, item = STRBUF_INIT;
+	int ret = 0;
 
-	return 0;
-}
+	/* this also ensures that we only parse submodule entries */
+	if (!name_and_item_from_var(var, &name, &item))
+		return 0;
 
-static int parse_specific_submodule_config(const char *subsection,
-					   const char *key,
-					   const char *var,
-					   const char *value,
-					   struct parse_config_parameter *me)
-{
-	int ret = 0;
-	struct submodule *submodule;
 	submodule = lookup_or_create_by_name(me->cache,
 					     me->gitmodules_sha1,
-					     subsection);
+					     name.buf);
 
-	if (!strcmp(key, "path")) {
+	if (!strcmp(item.buf, "path")) {
 		if (!value)
 			ret = config_error_nonbool(var);
 		else if (!me->overwrite && submodule->path)
@@ -269,7 +276,7 @@ static int parse_specific_submodule_config(const char *subsection,
 			submodule->path = xstrdup(value);
 			cache_put_path(me->cache, submodule);
 		}
-	} else if (!strcmp(key, "fetchrecursesubmodules")) {
+	} else if (!strcmp(item.buf, "fetchrecursesubmodules")) {
 		/* when parsing worktree configurations we can die early */
 		int die_on_error = is_null_sha1(me->gitmodules_sha1);
 		if (!me->overwrite &&
@@ -280,7 +287,7 @@ static int parse_specific_submodule_config(const char *subsection,
 			submodule->fetch_recurse = parse_fetch_recurse(
 								var, value,
 								die_on_error);
-	} else if (!strcmp(key, "ignore")) {
+	} else if (!strcmp(item.buf, "ignore")) {
 		if (!value)
 			ret = config_error_nonbool(var);
 		else if (!me->overwrite && submodule->ignore)
@@ -296,7 +303,7 @@ static int parse_specific_submodule_config(const char *subsection,
 			free((void *) submodule->ignore);
 			submodule->ignore = xstrdup(value);
 		}
-	} else if (!strcmp(key, "url")) {
+	} else if (!strcmp(item.buf, "url")) {
 		if (!value) {
 			ret = config_error_nonbool(var);
 		} else if (!me->overwrite && submodule->url) {
@@ -306,39 +313,31 @@ static int parse_specific_submodule_config(const char *subsection,
 			free((void *) submodule->url);
 			submodule->url = xstrdup(value);
 		}
-	} else if (!strcmp(key, "update")) {
+	} else if (!strcmp(item.buf, "update")) {
 		if (!value)
 			ret = config_error_nonbool(var);
-		else if (!me->overwrite && submodule->update)
+		else if (!me->overwrite &&
+		    submodule->update != SM_UPDATE_UNSPECIFIED)
 			warn_multiple_config(me->commit_sha1, submodule->name,
 					     "update");
 		else {
-			free((void *) submodule->update);
-			submodule->update = xstrdup(value);
+			submodule->update_command = NULL;
+			if (!strcmp(value, "none"))
+				submodule->update = SM_UPDATE_NONE;
+			else if (!strcmp(value, "checkout"))
+				submodule->update = SM_UPDATE_CHECKOUT;
+			else if (!strcmp(value, "rebase"))
+				submodule->update = SM_UPDATE_REBASE;
+			else if (!strcmp(value, "merge"))
+				submodule->update = SM_UPDATE_MERGE;
+			else if (!skip_prefix(value, "!", &submodule->update_command))
+				die(_("invalid value for %s"), var);
 		}
 	}
 
-	return ret;
-}
-
-static int parse_config(const char *var, const char *value, void *data)
-{
-	struct parse_config_parameter *me = data;
-	int subsection_len, ret;
-	const char *subsection, *key;
-	char *sub;
-
-	if (parse_config_key(var, "submodule", &subsection,
-			     &subsection_len, &key) < 0)
-		return 0;
-
-	if (!subsection)
-		return parse_generic_submodule_config(key, var, value, me);
+	strbuf_release(&name);
+	strbuf_release(&item);
 
-	sub = xmemdupz(subsection, subsection_len);
-	ret = parse_specific_submodule_config(sub, key,
-					      var, value, me);
-	free(sub);
 	return ret;
 }
 
@@ -487,8 +486,3 @@ void submodule_free(void)
 	cache_free(&cache);
 	is_cache_init = 0;
 }
-
-unsigned long config_parallel_submodules(void)
-{
-	return parallel_jobs;
-}
diff --git a/submodule-config.h b/submodule-config.h
index bab1e8d..e3bd56e 100644
--- a/submodule-config.h
+++ b/submodule-config.h
@@ -4,6 +4,15 @@
 #include "hashmap.h"
 #include "strbuf.h"
 
+enum submodule_update_type {
+	SM_UPDATE_CHECKOUT,
+	SM_UPDATE_REBASE,
+	SM_UPDATE_MERGE,
+	SM_UPDATE_NONE,
+	SM_UPDATE_COMMAND,
+	SM_UPDATE_UNSPECIFIED
+};
+
 /*
  * Submodule entry containing the information about a certain submodule
  * in a certain revision.
@@ -14,7 +23,8 @@ struct submodule {
 	const char *url;
 	int fetch_recurse;
 	const char *ignore;
-	const char *update;
+	enum submodule_update_type update;
+	const char *update_command;
 	/* the sha1 blob id of the responsible .gitmodules file */
 	unsigned char gitmodules_sha1[20];
 };
@@ -27,6 +37,4 @@ const struct submodule *submodule_from_path(const unsigned char *commit_sha1,
 		const char *path);
 void submodule_free(void);
 
-unsigned long config_parallel_submodules(void);
-
 #endif /* SUBMODULE_CONFIG_H */
diff --git a/submodule.c b/submodule.c
index eb7d54b..fd763f5 100644
--- a/submodule.c
+++ b/submodule.c
@@ -15,6 +15,7 @@
 #include "thread-utils.h"
 
 static int config_fetch_recurse_submodules = RECURSE_SUBMODULES_ON_DEMAND;
+static int parallel_jobs = 1;
 static struct string_list changed_submodule_paths;
 static int initialized_fetch_ref_tips;
 static struct sha1_array ref_tips_before_fetch;
@@ -169,7 +170,13 @@ void set_diffopt_flags_from_submodule_config(struct diff_options *diffopt,
 
 int submodule_config(const char *var, const char *value, void *cb)
 {
-	if (starts_with(var, "submodule."))
+	if (!strcmp(var, "submodule.fetchjobs")) {
+		unsigned long val;
+		if (!git_parse_ulong(value, &val) || 0 > val || val >= INT_MAX)
+			die(_("Error parsing submodule.fetchJobs %s"), value);
+		parallel_jobs = val;
+		return 0;
+	} else if (starts_with(var, "submodule."))
 		return parse_submodule_config_option(var, value);
 	else if (!strcmp(var, "fetch.recursesubmodules")) {
 		config_fetch_recurse_submodules = parse_fetch_recurse_submodules_arg(var, value);
@@ -752,9 +759,7 @@ int fetch_populated_submodules(const struct argv_array *options,
 	/* default value, "--submodule-prefix" and its value are added later */
 
 	if (max_parallel_jobs < 0)
-		max_parallel_jobs = config_parallel_submodules();
-	if (max_parallel_jobs < 0)
-		max_parallel_jobs = 1;
+		max_parallel_jobs = parallel_jobs;
 
 	calculate_changed_submodule_paths();
 	run_processes_parallel(max_parallel_jobs,
@@ -1102,3 +1107,8 @@ void connect_work_tree_and_git_dir(const char *work_tree, const char *git_dir)
 	strbuf_release(&rel_path);
 	free((void *)real_work_tree);
 }
+
+int parallel_submodules(void)
+{
+	return parallel_jobs;
+}
diff --git a/submodule.h b/submodule.h
index cbc0003..95babc5 100644
--- a/submodule.h
+++ b/submodule.h
@@ -41,5 +41,6 @@ int find_unpushed_submodules(unsigned char new_sha1[20], const char *remotes_nam
 		struct string_list *needs_pushing);
 int push_unpushed_submodules(unsigned char new_sha1[20], const char *remotes_name);
 void connect_work_tree_and_git_dir(const char *work_tree, const char *git_dir);
+int parallel_submodules(void);
 
 #endif


Stefan Beller (6):
  submodule-config: keep update strategy around
  submodule-config: drop check against NULL
  fetching submodules: respect `submodule.fetchJobs` config option
  git submodule update: have a dedicated helper for cloning
  submodule update: expose parallelism to the user
  clone: allow an explicit argument for parallel submodule clones

 Documentation/config.txt        |   6 +
 Documentation/git-clone.txt     |   6 +-
 Documentation/git-submodule.txt |   7 +-
 builtin/clone.c                 |  19 +++-
 builtin/fetch.c                 |   2 +-
 builtin/submodule--helper.c     | 238 ++++++++++++++++++++++++++++++++++++++++
 git-submodule.sh                |  54 ++++-----
 submodule-config.c              |  28 ++++-
 submodule-config.h              |  11 ++
 submodule.c                     |  17 ++-
 submodule.h                     |   1 +
 t/t5526-fetch-submodules.sh     |  14 +++
 t/t7400-submodule-basic.sh      |   4 +-
 t/t7406-submodule-update.sh     |  27 +++++
 14 files changed, 385 insertions(+), 49 deletions(-)

-- 
2.7.0.rc0.35.gb0f7869.dirty

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCHv9 1/6] submodule-config: keep update strategy around
  2016-02-09 20:54 [PATCHv9 0/6] Expose submodule parallelism to the user Stefan Beller
@ 2016-02-09 20:54 ` Stefan Beller
  2016-02-09 21:08   ` Junio C Hamano
                     ` (2 more replies)
  2016-02-09 20:54 ` [PATCHv9 2/6] submodule-config: drop check against NULL Stefan Beller
                   ` (5 subsequent siblings)
  6 siblings, 3 replies; 30+ messages in thread
From: Stefan Beller @ 2016-02-09 20:54 UTC (permalink / raw)
  To: gitster, jrnieder, git, Jens.Lehmann; +Cc: Stefan Beller

Currently submodule.<name>.update is only handled by git-submodule.sh.
C code will start to need to make use of that value as more of the
functionality of git-submodule.sh moves into library code in C.

Add the update field to 'struct submodule' and populate it so it can
be read as sm->update or from sm->update_command.

Signed-off-by: Stefan Beller <sbeller@google.com>
---
 submodule-config.c | 22 ++++++++++++++++++++++
 submodule-config.h | 11 +++++++++++
 2 files changed, 33 insertions(+)

diff --git a/submodule-config.c b/submodule-config.c
index afe0ea8..a1af5de 100644
--- a/submodule-config.c
+++ b/submodule-config.c
@@ -194,6 +194,8 @@ static struct submodule *lookup_or_create_by_name(struct submodule_cache *cache,
 
 	submodule->path = NULL;
 	submodule->url = NULL;
+	submodule->update = SM_UPDATE_UNSPECIFIED;
+	submodule->update_command = NULL;
 	submodule->fetch_recurse = RECURSE_SUBMODULES_NONE;
 	submodule->ignore = NULL;
 
@@ -311,6 +313,26 @@ static int parse_config(const char *var, const char *value, void *data)
 			free((void *) submodule->url);
 			submodule->url = xstrdup(value);
 		}
+	} else if (!strcmp(item.buf, "update")) {
+		if (!value)
+			ret = config_error_nonbool(var);
+		else if (!me->overwrite &&
+		    submodule->update != SM_UPDATE_UNSPECIFIED)
+			warn_multiple_config(me->commit_sha1, submodule->name,
+					     "update");
+		else {
+			submodule->update_command = NULL;
+			if (!strcmp(value, "none"))
+				submodule->update = SM_UPDATE_NONE;
+			else if (!strcmp(value, "checkout"))
+				submodule->update = SM_UPDATE_CHECKOUT;
+			else if (!strcmp(value, "rebase"))
+				submodule->update = SM_UPDATE_REBASE;
+			else if (!strcmp(value, "merge"))
+				submodule->update = SM_UPDATE_MERGE;
+			else if (!skip_prefix(value, "!", &submodule->update_command))
+				die(_("invalid value for %s"), var);
+		}
 	}
 
 	strbuf_release(&name);
diff --git a/submodule-config.h b/submodule-config.h
index 9061e4e..e3bd56e 100644
--- a/submodule-config.h
+++ b/submodule-config.h
@@ -4,6 +4,15 @@
 #include "hashmap.h"
 #include "strbuf.h"
 
+enum submodule_update_type {
+	SM_UPDATE_CHECKOUT,
+	SM_UPDATE_REBASE,
+	SM_UPDATE_MERGE,
+	SM_UPDATE_NONE,
+	SM_UPDATE_COMMAND,
+	SM_UPDATE_UNSPECIFIED
+};
+
 /*
  * Submodule entry containing the information about a certain submodule
  * in a certain revision.
@@ -14,6 +23,8 @@ struct submodule {
 	const char *url;
 	int fetch_recurse;
 	const char *ignore;
+	enum submodule_update_type update;
+	const char *update_command;
 	/* the sha1 blob id of the responsible .gitmodules file */
 	unsigned char gitmodules_sha1[20];
 };
-- 
2.7.0.rc0.35.gb0f7869.dirty

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCHv9 2/6] submodule-config: drop check against NULL
  2016-02-09 20:54 [PATCHv9 0/6] Expose submodule parallelism to the user Stefan Beller
  2016-02-09 20:54 ` [PATCHv9 1/6] submodule-config: keep update strategy around Stefan Beller
@ 2016-02-09 20:54 ` Stefan Beller
  2016-02-09 21:50   ` Jonathan Nieder
  2016-02-09 20:54 ` [PATCHv9 3/6] fetching submodules: respect `submodule.fetchJobs` config option Stefan Beller
                   ` (4 subsequent siblings)
  6 siblings, 1 reply; 30+ messages in thread
From: Stefan Beller @ 2016-02-09 20:54 UTC (permalink / raw)
  To: gitster, jrnieder, git, Jens.Lehmann; +Cc: Stefan Beller

Adhere to the common coding style of Git and not check explicitly
for NULL throughout the file. There are still other occurrences in the
code base but that is usually inside of conditions with side effects.

Signed-off-by: Stefan Beller <sbeller@google.com>
---
 submodule-config.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/submodule-config.c b/submodule-config.c
index a1af5de..d6c8d9c 100644
--- a/submodule-config.c
+++ b/submodule-config.c
@@ -266,7 +266,7 @@ static int parse_config(const char *var, const char *value, void *data)
 	if (!strcmp(item.buf, "path")) {
 		if (!value)
 			ret = config_error_nonbool(var);
-		else if (!me->overwrite && submodule->path != NULL)
+		else if (!me->overwrite && submodule->path)
 			warn_multiple_config(me->commit_sha1, submodule->name,
 					"path");
 		else {
@@ -290,7 +290,7 @@ static int parse_config(const char *var, const char *value, void *data)
 	} else if (!strcmp(item.buf, "ignore")) {
 		if (!value)
 			ret = config_error_nonbool(var);
-		else if (!me->overwrite && submodule->ignore != NULL)
+		else if (!me->overwrite && submodule->ignore)
 			warn_multiple_config(me->commit_sha1, submodule->name,
 					"ignore");
 		else if (strcmp(value, "untracked") &&
@@ -306,7 +306,7 @@ static int parse_config(const char *var, const char *value, void *data)
 	} else if (!strcmp(item.buf, "url")) {
 		if (!value) {
 			ret = config_error_nonbool(var);
-		} else if (!me->overwrite && submodule->url != NULL) {
+		} else if (!me->overwrite && submodule->url) {
 			warn_multiple_config(me->commit_sha1, submodule->name,
 					"url");
 		} else {
-- 
2.7.0.rc0.35.gb0f7869.dirty

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCHv9 3/6] fetching submodules: respect `submodule.fetchJobs` config option
  2016-02-09 20:54 [PATCHv9 0/6] Expose submodule parallelism to the user Stefan Beller
  2016-02-09 20:54 ` [PATCHv9 1/6] submodule-config: keep update strategy around Stefan Beller
  2016-02-09 20:54 ` [PATCHv9 2/6] submodule-config: drop check against NULL Stefan Beller
@ 2016-02-09 20:54 ` Stefan Beller
  2016-02-09 21:12   ` Junio C Hamano
  2016-02-09 22:34   ` Jonathan Nieder
  2016-02-09 20:54 ` [PATCHv9 4/6] git submodule update: have a dedicated helper for cloning Stefan Beller
                   ` (3 subsequent siblings)
  6 siblings, 2 replies; 30+ messages in thread
From: Stefan Beller @ 2016-02-09 20:54 UTC (permalink / raw)
  To: gitster, jrnieder, git, Jens.Lehmann; +Cc: Stefan Beller

This allows to configure fetching and updating in parallel
without having the command line option.

This moved the responsibility to determine how many parallel processes
to start from builtin/fetch to submodule.c as we need a way to communicate
"The user did not specify the number of parallel processes in the command
line options" in the builtin fetch. The submodule code takes care of
the precedence (CLI > config > default).

Signed-off-by: Stefan Beller <sbeller@google.com>
---
 Documentation/config.txt    |  6 ++++++
 builtin/fetch.c             |  2 +-
 submodule.c                 | 17 ++++++++++++++++-
 submodule.h                 |  1 +
 t/t5526-fetch-submodules.sh | 14 ++++++++++++++
 5 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/Documentation/config.txt b/Documentation/config.txt
index 2d06b11..3b02732 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -2646,6 +2646,12 @@ submodule.<name>.ignore::
 	"--ignore-submodules" option. The 'git submodule' commands are not
 	affected by this setting.
 
+submodule.fetchJobs::
+	Specifies how many submodules are fetched/cloned at the same time.
+	A positive integer allows up to that number of submodules fetched
+	in parallel. A value of 0 will give some reasonable default.
+	If unset, it defaults to 1.
+
 tag.sort::
 	This variable controls the sort ordering of tags when displayed by
 	linkgit:git-tag[1]. Without the "--sort=<value>" option provided, the
diff --git a/builtin/fetch.c b/builtin/fetch.c
index 586840d..5aa1c2d 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -37,7 +37,7 @@ static int prune = -1; /* unspecified */
 static int all, append, dry_run, force, keep, multiple, update_head_ok, verbosity;
 static int progress = -1, recurse_submodules = RECURSE_SUBMODULES_DEFAULT;
 static int tags = TAGS_DEFAULT, unshallow, update_shallow;
-static int max_children = 1;
+static int max_children = -1;
 static const char *depth;
 static const char *upload_pack;
 static struct strbuf default_rla = STRBUF_INIT;
diff --git a/submodule.c b/submodule.c
index b83939c..fd763f5 100644
--- a/submodule.c
+++ b/submodule.c
@@ -15,6 +15,7 @@
 #include "thread-utils.h"
 
 static int config_fetch_recurse_submodules = RECURSE_SUBMODULES_ON_DEMAND;
+static int parallel_jobs = 1;
 static struct string_list changed_submodule_paths;
 static int initialized_fetch_ref_tips;
 static struct sha1_array ref_tips_before_fetch;
@@ -169,7 +170,13 @@ void set_diffopt_flags_from_submodule_config(struct diff_options *diffopt,
 
 int submodule_config(const char *var, const char *value, void *cb)
 {
-	if (starts_with(var, "submodule."))
+	if (!strcmp(var, "submodule.fetchjobs")) {
+		unsigned long val;
+		if (!git_parse_ulong(value, &val) || 0 > val || val >= INT_MAX)
+			die(_("Error parsing submodule.fetchJobs %s"), value);
+		parallel_jobs = val;
+		return 0;
+	} else if (starts_with(var, "submodule."))
 		return parse_submodule_config_option(var, value);
 	else if (!strcmp(var, "fetch.recursesubmodules")) {
 		config_fetch_recurse_submodules = parse_fetch_recurse_submodules_arg(var, value);
@@ -751,6 +758,9 @@ int fetch_populated_submodules(const struct argv_array *options,
 	argv_array_push(&spf.args, "--recurse-submodules-default");
 	/* default value, "--submodule-prefix" and its value are added later */
 
+	if (max_parallel_jobs < 0)
+		max_parallel_jobs = parallel_jobs;
+
 	calculate_changed_submodule_paths();
 	run_processes_parallel(max_parallel_jobs,
 			       get_next_submodule,
@@ -1097,3 +1107,8 @@ void connect_work_tree_and_git_dir(const char *work_tree, const char *git_dir)
 	strbuf_release(&rel_path);
 	free((void *)real_work_tree);
 }
+
+int parallel_submodules(void)
+{
+	return parallel_jobs;
+}
diff --git a/submodule.h b/submodule.h
index cbc0003..95babc5 100644
--- a/submodule.h
+++ b/submodule.h
@@ -41,5 +41,6 @@ int find_unpushed_submodules(unsigned char new_sha1[20], const char *remotes_nam
 		struct string_list *needs_pushing);
 int push_unpushed_submodules(unsigned char new_sha1[20], const char *remotes_name);
 void connect_work_tree_and_git_dir(const char *work_tree, const char *git_dir);
+int parallel_submodules(void);
 
 #endif
diff --git a/t/t5526-fetch-submodules.sh b/t/t5526-fetch-submodules.sh
index 1241146..954d0e4 100755
--- a/t/t5526-fetch-submodules.sh
+++ b/t/t5526-fetch-submodules.sh
@@ -471,4 +471,18 @@ test_expect_success "don't fetch submodule when newly recorded commits are alrea
 	test_i18ncmp expect.err actual.err
 '
 
+test_expect_success 'fetching submodules respects parallel settings' '
+	git config fetch.recurseSubmodules true &&
+	(
+		cd downstream &&
+		GIT_TRACE=$(pwd)/trace.out git fetch --jobs 7 &&
+		grep "7 tasks" trace.out &&
+		git config submodule.fetchJobs 8 &&
+		GIT_TRACE=$(pwd)/trace.out git fetch &&
+		grep "8 tasks" trace.out &&
+		GIT_TRACE=$(pwd)/trace.out git fetch --jobs 9 &&
+		grep "9 tasks" trace.out
+	)
+'
+
 test_done
-- 
2.7.0.rc0.35.gb0f7869.dirty

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCHv9 4/6] git submodule update: have a dedicated helper for cloning
  2016-02-09 20:54 [PATCHv9 0/6] Expose submodule parallelism to the user Stefan Beller
                   ` (2 preceding siblings ...)
  2016-02-09 20:54 ` [PATCHv9 3/6] fetching submodules: respect `submodule.fetchJobs` config option Stefan Beller
@ 2016-02-09 20:54 ` Stefan Beller
  2016-02-09 21:24   ` Junio C Hamano
                     ` (2 more replies)
  2016-02-09 20:54 ` [PATCHv9 5/6] submodule update: expose parallelism to the user Stefan Beller
                   ` (2 subsequent siblings)
  6 siblings, 3 replies; 30+ messages in thread
From: Stefan Beller @ 2016-02-09 20:54 UTC (permalink / raw)
  To: gitster, jrnieder, git, Jens.Lehmann; +Cc: Stefan Beller

This introduces a new helper function in git submodule--helper
which takes care of cloning all submodules, which we want to
parallelize eventually.

Some tests (such as empty URL, update_mode=none) are required in the
helper to make the decision for cloning. These checks have been
moved into the C function as well (no need to repeat them in the
shell script).

As we can only access the stderr channel from within the parallel
processing engine, we need to reroute the error message for
specified but initialized submodules to stderr. As it is an error
message, this should have gone to stderr in the first place, so it
is a bug fix along the way.

Signed-off-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/submodule--helper.c | 230 ++++++++++++++++++++++++++++++++++++++++++++
 git-submodule.sh            |  45 +++------
 t/t7400-submodule-basic.sh  |   4 +-
 3 files changed, 243 insertions(+), 36 deletions(-)

diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
index f4c3eff..53356dc 100644
--- a/builtin/submodule--helper.c
+++ b/builtin/submodule--helper.c
@@ -255,6 +255,235 @@ static int module_clone(int argc, const char **argv, const char *prefix)
 	return 0;
 }
 
+static int git_submodule_config(const char *var, const char *value, void *cb)
+{
+	return submodule_config(var, value, cb);
+}
+
+struct submodule_update_clone {
+	/* states */
+	int count;
+	int print_unmatched;
+	/* configuration */
+	int quiet;
+	const char *reference;
+	const char *depth;
+	const char *update;
+	const char *recursive_prefix;
+	const char *prefix;
+	struct module_list list;
+	struct string_list projectlines;
+	struct pathspec pathspec;
+};
+#define SUBMODULE_UPDATE_CLONE_INIT {0, 0, 0, NULL, NULL, NULL, NULL, NULL, MODULE_LIST_INIT, STRING_LIST_INIT_DUP}
+
+static void fill_clone_command(struct child_process *cp, int quiet,
+			       const char *prefix, const char *path,
+			       const char *name, const char *url,
+			       const char *reference, const char *depth)
+{
+	cp->git_cmd = 1;
+	cp->no_stdin = 1;
+	cp->stdout_to_stderr = 1;
+	cp->err = -1;
+	argv_array_push(&cp->args, "submodule--helper");
+	argv_array_push(&cp->args, "clone");
+	if (quiet)
+		argv_array_push(&cp->args, "--quiet");
+
+	if (prefix)
+		argv_array_pushl(&cp->args, "--prefix", prefix, NULL);
+
+	argv_array_pushl(&cp->args, "--path", path, NULL);
+	argv_array_pushl(&cp->args, "--name", name, NULL);
+	argv_array_pushl(&cp->args, "--url", url, NULL);
+	if (reference)
+		argv_array_push(&cp->args, reference);
+	if (depth)
+		argv_array_push(&cp->args, depth);
+}
+
+static int update_clone_get_next_task(struct child_process *cp,
+				      struct strbuf *err,
+				      void *pp_cb,
+				      void **pp_task_cb)
+{
+	struct submodule_update_clone *pp = pp_cb;
+
+	for (; pp->count < pp->list.nr; pp->count++) {
+		const struct submodule *sub = NULL;
+		const struct cache_entry *ce = pp->list.entries[pp->count];
+		struct strbuf displaypath_sb = STRBUF_INIT;
+		struct strbuf sb = STRBUF_INIT;
+		const char *displaypath = NULL;
+		char *url = NULL;
+		int needs_cloning = 0;
+
+		if (ce_stage(ce)) {
+			if (pp->recursive_prefix)
+				strbuf_addf(err,
+					"Skipping unmerged submodule %s/%s\n",
+					pp->recursive_prefix, ce->name);
+			else
+				strbuf_addf(err,
+					"Skipping unmerged submodule %s\n",
+					ce->name);
+			goto cleanup_and_continue;
+		}
+
+		sub = submodule_from_path(null_sha1, ce->name);
+
+		if (pp->recursive_prefix)
+			displaypath = relative_path(pp->recursive_prefix,
+						    ce->name, &displaypath_sb);
+		else
+			displaypath = ce->name;
+
+		if ((pp->update && !strcmp(pp->update, "none")) ||
+		    (!pp->update && sub->update == SM_UPDATE_NONE)) {
+			strbuf_addf(err, "Skipping submodule '%s'\n",
+				    displaypath);
+			goto cleanup_and_continue;
+		}
+
+		/*
+		 * Looking up the url in .git/config.
+		 * We must not fall back to .gitmodules as we only want
+		 * to process configured submodules.
+		 */
+		strbuf_reset(&sb);
+		strbuf_addf(&sb, "submodule.%s.url", sub->name);
+		git_config_get_string(sb.buf, &url);
+		if (!url) {
+			/*
+			 * Only mention uninitialized submodules when its
+			 * path have been specified
+			 */
+			if (pp->pathspec.nr)
+				strbuf_addf(err,
+					    _("Submodule path '%s' not initialized\n"
+					    "Maybe you want to use 'update --init'?"),
+					    displaypath);
+			goto cleanup_and_continue;
+		}
+
+		strbuf_reset(&sb);
+		strbuf_addf(&sb, "%s/.git", ce->name);
+		needs_cloning = !file_exists(sb.buf);
+
+		strbuf_reset(&sb);
+		strbuf_addf(&sb, "%06o %s %d %d\t%s\n", ce->ce_mode,
+				sha1_to_hex(ce->sha1), ce_stage(ce),
+				needs_cloning, ce->name);
+		string_list_append(&pp->projectlines, sb.buf);
+
+		if (needs_cloning) {
+			fill_clone_command(cp, pp->quiet, pp->prefix,
+					   ce->name, sub->name, strdup(url),
+					   pp->reference, pp->depth);
+			pp->count++;
+		}
+
+cleanup_and_continue:
+		free(url);
+		strbuf_reset(&displaypath_sb);
+		strbuf_reset(&sb);
+
+		if (needs_cloning)
+			return 1;
+	}
+	return 0;
+}
+
+static int update_clone_start_failure(struct child_process *cp,
+				      struct strbuf *err,
+				      void *pp_cb,
+				      void *pp_task_cb)
+{
+	struct submodule_update_clone *pp = pp_cb;
+
+	strbuf_addf(err, "error when starting a child process");
+	pp->print_unmatched = 1;
+
+	return 1;
+}
+
+static int update_clone_task_finished(int result,
+				      struct child_process *cp,
+				      struct strbuf *err,
+				      void *pp_cb,
+				      void *pp_task_cb)
+{
+	struct submodule_update_clone *pp = pp_cb;
+
+	if (!result) {
+		return 0;
+	} else {
+		strbuf_addf(err, "error in one child process");
+		pp->print_unmatched = 1;
+		return 1;
+	}
+}
+
+static int update_clone(int argc, const char **argv, const char *prefix)
+{
+	struct string_list_item *item;
+	struct submodule_update_clone pp = SUBMODULE_UPDATE_CLONE_INIT;
+
+	struct option module_list_options[] = {
+		OPT_STRING(0, "prefix", &prefix,
+			   N_("path"),
+			   N_("path into the working tree")),
+		OPT_STRING(0, "recursive_prefix", &pp.recursive_prefix,
+			   N_("path"),
+			   N_("path into the working tree, across nested "
+			      "submodule boundaries")),
+		OPT_STRING(0, "update", &pp.update,
+			   N_("string"),
+			   N_("update command for submodules")),
+		OPT_STRING(0, "reference", &pp.reference, "<repository>",
+			   N_("Use the local reference repository "
+			      "instead of a full clone")),
+		OPT_STRING(0, "depth", &pp.depth, "<depth>",
+			   N_("Create a shallow clone truncated to the "
+			      "specified number of revisions")),
+		OPT__QUIET(&pp.quiet, N_("do't print cloning progress")),
+		OPT_END()
+	};
+
+	const char *const git_submodule_helper_usage[] = {
+		N_("git submodule--helper list [--prefix=<path>] [<path>...]"),
+		NULL
+	};
+	pp.prefix = prefix;
+
+	argc = parse_options(argc, argv, prefix, module_list_options,
+			     git_submodule_helper_usage, 0);
+
+	if (module_list_compute(argc, argv, prefix, &pp.pathspec, &pp.list) < 0) {
+		printf("#unmatched\n");
+		return 1;
+	}
+
+	gitmodules_config();
+	/* Overlay the parsed .gitmodules file with .git/config */
+	git_config(git_submodule_config, NULL);
+	run_processes_parallel(1, update_clone_get_next_task,
+				  update_clone_start_failure,
+				  update_clone_task_finished,
+				  &pp);
+
+	if (pp.print_unmatched) {
+		printf("#unmatched\n");
+		return 1;
+	}
+
+	for_each_string_list_item(item, &pp.projectlines)
+		utf8_fprintf(stdout, "%s", item->string);
+
+	return 0;
+}
+
 struct cmd_struct {
 	const char *cmd;
 	int (*fn)(int, const char **, const char *);
@@ -264,6 +493,7 @@ static struct cmd_struct commands[] = {
 	{"list", module_list},
 	{"name", module_name},
 	{"clone", module_clone},
+	{"update-clone", update_clone}
 };
 
 int cmd_submodule__helper(int argc, const char **argv, const char *prefix)
diff --git a/git-submodule.sh b/git-submodule.sh
index 9bc5c5f..9f554fb 100755
--- a/git-submodule.sh
+++ b/git-submodule.sh
@@ -664,17 +664,18 @@ cmd_update()
 		cmd_init "--" "$@" || return
 	fi
 
-	cloned_modules=
-	git submodule--helper list --prefix "$wt_prefix" "$@" | {
+	git submodule--helper update-clone ${GIT_QUIET:+--quiet} \
+		${wt_prefix:+--prefix "$wt_prefix"} \
+		${prefix:+--recursive_prefix "$prefix"} \
+		${update:+--update "$update"} \
+		${reference:+--reference "$reference"} \
+		${depth:+--depth "$depth"} \
+		"$@" | {
 	err=
-	while read mode sha1 stage sm_path
+	while read mode sha1 stage just_cloned sm_path
 	do
 		die_if_unmatched "$mode"
-		if test "$stage" = U
-		then
-			echo >&2 "Skipping unmerged submodule $prefix$sm_path"
-			continue
-		fi
+
 		name=$(git submodule--helper name "$sm_path") || exit
 		url=$(git config submodule."$name".url)
 		branch=$(get_submodule_config "$name" branch master)
@@ -691,27 +692,10 @@ cmd_update()
 
 		displaypath=$(relative_path "$prefix$sm_path")
 
-		if test "$update_module" = "none"
-		then
-			echo "Skipping submodule '$displaypath'"
-			continue
-		fi
-
-		if test -z "$url"
-		then
-			# Only mention uninitialized submodules when its
-			# path have been specified
-			test "$#" != "0" &&
-			say "$(eval_gettext "Submodule path '\$displaypath' not initialized
-Maybe you want to use 'update --init'?")"
-			continue
-		fi
-
-		if ! test -d "$sm_path"/.git && ! test -f "$sm_path"/.git
+		if test $just_cloned -eq 1
 		then
-			git submodule--helper clone ${GIT_QUIET:+--quiet} --prefix "$prefix" --path "$sm_path" --name "$name" --url "$url" "$reference" "$depth" || exit
-			cloned_modules="$cloned_modules;$name"
 			subsha1=
+			update_module=checkout
 		else
 			subsha1=$(clear_local_git_env; cd "$sm_path" &&
 				git rev-parse --verify HEAD) ||
@@ -751,13 +735,6 @@ Maybe you want to use 'update --init'?")"
 				die "$(eval_gettext "Unable to fetch in submodule path '\$displaypath'")"
 			fi
 
-			# Is this something we just cloned?
-			case ";$cloned_modules;" in
-			*";$name;"*)
-				# then there is no local change to integrate
-				update_module=checkout ;;
-			esac
-
 			must_die_on_failure=
 			case "$update_module" in
 			checkout)
diff --git a/t/t7400-submodule-basic.sh b/t/t7400-submodule-basic.sh
index 540771c..5991e3c 100755
--- a/t/t7400-submodule-basic.sh
+++ b/t/t7400-submodule-basic.sh
@@ -462,7 +462,7 @@ test_expect_success 'update --init' '
 	git config --remove-section submodule.example &&
 	test_must_fail git config submodule.example.url &&
 
-	git submodule update init > update.out &&
+	git submodule update init 2> update.out &&
 	cat update.out &&
 	test_i18ngrep "not initialized" update.out &&
 	test_must_fail git rev-parse --resolve-git-dir init/.git &&
@@ -480,7 +480,7 @@ test_expect_success 'update --init from subdirectory' '
 	mkdir -p sub &&
 	(
 		cd sub &&
-		git submodule update ../init >update.out &&
+		git submodule update ../init 2>update.out &&
 		cat update.out &&
 		test_i18ngrep "not initialized" update.out &&
 		test_must_fail git rev-parse --resolve-git-dir ../init/.git &&
-- 
2.7.0.rc0.35.gb0f7869.dirty

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCHv9 5/6] submodule update: expose parallelism to the user
  2016-02-09 20:54 [PATCHv9 0/6] Expose submodule parallelism to the user Stefan Beller
                   ` (3 preceding siblings ...)
  2016-02-09 20:54 ` [PATCHv9 4/6] git submodule update: have a dedicated helper for cloning Stefan Beller
@ 2016-02-09 20:54 ` Stefan Beller
  2016-02-09 20:54 ` [PATCHv9 6/6] clone: allow an explicit argument for parallel submodule clones Stefan Beller
  2016-02-09 21:39 ` [PATCHv9 0/6] Expose submodule parallelism to the user Junio C Hamano
  6 siblings, 0 replies; 30+ messages in thread
From: Stefan Beller @ 2016-02-09 20:54 UTC (permalink / raw)
  To: gitster, jrnieder, git, Jens.Lehmann; +Cc: Stefan Beller

Expose possible parallelism either via the "--jobs" CLI parameter or
the "submodule.fetchJobs" setting.

By having the variable initialized to -1, we make sure 0 can be passed
into the parallel processing machine, which will then pick as many parallel
workers as there are CPUs.

Signed-off-by: Stefan Beller <sbeller@google.com>
---
 Documentation/git-submodule.txt |  7 ++++++-
 builtin/submodule--helper.c     | 16 ++++++++++++----
 git-submodule.sh                |  9 +++++++++
 t/t7406-submodule-update.sh     | 12 ++++++++++++
 4 files changed, 39 insertions(+), 5 deletions(-)

diff --git a/Documentation/git-submodule.txt b/Documentation/git-submodule.txt
index 1572f05..13adebf 100644
--- a/Documentation/git-submodule.txt
+++ b/Documentation/git-submodule.txt
@@ -16,7 +16,7 @@ SYNOPSIS
 'git submodule' [--quiet] deinit [-f|--force] [--] <path>...
 'git submodule' [--quiet] update [--init] [--remote] [-N|--no-fetch]
 	      [-f|--force] [--rebase|--merge] [--reference <repository>]
-	      [--depth <depth>] [--recursive] [--] [<path>...]
+	      [--depth <depth>] [--recursive] [--jobs <n>] [--] [<path>...]
 'git submodule' [--quiet] summary [--cached|--files] [(-n|--summary-limit) <n>]
 	      [commit] [--] [<path>...]
 'git submodule' [--quiet] foreach [--recursive] <command>
@@ -377,6 +377,11 @@ for linkgit:git-clone[1]'s `--reference` and `--shared` options carefully.
 	clone with a history truncated to the specified number of revisions.
 	See linkgit:git-clone[1]
 
+-j <n>::
+--jobs <n>::
+	This option is only valid for the update command.
+	Clone new submodules in parallel with as many jobs.
+	Defaults to the `submodule.fetchJobs` option.
 
 <path>...::
 	Paths to submodule(s). When specified this will restrict the command
diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
index 53356dc..6c74474 100644
--- a/builtin/submodule--helper.c
+++ b/builtin/submodule--helper.c
@@ -427,6 +427,7 @@ static int update_clone_task_finished(int result,
 
 static int update_clone(int argc, const char **argv, const char *prefix)
 {
+	int max_jobs = -1;
 	struct string_list_item *item;
 	struct submodule_update_clone pp = SUBMODULE_UPDATE_CLONE_INIT;
 
@@ -447,6 +448,8 @@ static int update_clone(int argc, const char **argv, const char *prefix)
 		OPT_STRING(0, "depth", &pp.depth, "<depth>",
 			   N_("Create a shallow clone truncated to the "
 			      "specified number of revisions")),
+		OPT_INTEGER('j', "jobs", &max_jobs,
+			    N_("parallel jobs")),
 		OPT__QUIET(&pp.quiet, N_("do't print cloning progress")),
 		OPT_END()
 	};
@@ -468,10 +471,15 @@ static int update_clone(int argc, const char **argv, const char *prefix)
 	gitmodules_config();
 	/* Overlay the parsed .gitmodules file with .git/config */
 	git_config(git_submodule_config, NULL);
-	run_processes_parallel(1, update_clone_get_next_task,
-				  update_clone_start_failure,
-				  update_clone_task_finished,
-				  &pp);
+
+	if (max_jobs < 0)
+		max_jobs = parallel_submodules();
+
+	run_processes_parallel(max_jobs,
+			       update_clone_get_next_task,
+			       update_clone_start_failure,
+			       update_clone_task_finished,
+			       &pp);
 
 	if (pp.print_unmatched) {
 		printf("#unmatched\n");
diff --git a/git-submodule.sh b/git-submodule.sh
index 9f554fb..10c5af9 100755
--- a/git-submodule.sh
+++ b/git-submodule.sh
@@ -645,6 +645,14 @@ cmd_update()
 		--depth=*)
 			depth=$1
 			;;
+		-j|--jobs)
+			case "$2" in '') usage ;; esac
+			jobs="--jobs=$2"
+			shift
+			;;
+		--jobs=*)
+			jobs=$1
+			;;
 		--)
 			shift
 			break
@@ -670,6 +678,7 @@ cmd_update()
 		${update:+--update "$update"} \
 		${reference:+--reference "$reference"} \
 		${depth:+--depth "$depth"} \
+		${jobs:+$jobs} \
 		"$@" | {
 	err=
 	while read mode sha1 stage just_cloned sm_path
diff --git a/t/t7406-submodule-update.sh b/t/t7406-submodule-update.sh
index dda3929..7fd5142 100755
--- a/t/t7406-submodule-update.sh
+++ b/t/t7406-submodule-update.sh
@@ -774,4 +774,16 @@ test_expect_success 'submodule update --recursive drops module name before recur
 	 test_i18ngrep "Submodule path .deeper/submodule/subsubmodule.: checked out" actual
 	)
 '
+
+test_expect_success 'submodule update can be run in parallel' '
+	(cd super2 &&
+	 GIT_TRACE=$(pwd)/trace.out git submodule update --jobs 7 &&
+	 grep "7 tasks" trace.out &&
+	 git config submodule.fetchJobs 8 &&
+	 GIT_TRACE=$(pwd)/trace.out git submodule update &&
+	 grep "8 tasks" trace.out &&
+	 GIT_TRACE=$(pwd)/trace.out git submodule update --jobs 9 &&
+	 grep "9 tasks" trace.out
+	)
+'
 test_done
-- 
2.7.0.rc0.35.gb0f7869.dirty

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCHv9 6/6] clone: allow an explicit argument for parallel submodule clones
  2016-02-09 20:54 [PATCHv9 0/6] Expose submodule parallelism to the user Stefan Beller
                   ` (4 preceding siblings ...)
  2016-02-09 20:54 ` [PATCHv9 5/6] submodule update: expose parallelism to the user Stefan Beller
@ 2016-02-09 20:54 ` Stefan Beller
  2016-02-09 21:39 ` [PATCHv9 0/6] Expose submodule parallelism to the user Junio C Hamano
  6 siblings, 0 replies; 30+ messages in thread
From: Stefan Beller @ 2016-02-09 20:54 UTC (permalink / raw)
  To: gitster, jrnieder, git, Jens.Lehmann; +Cc: Stefan Beller

Just pass it along to "git submodule update", which may pick reasonable
defaults if you don't specify an explicit number.

Signed-off-by: Stefan Beller <sbeller@google.com>
---
 Documentation/git-clone.txt |  6 +++++-
 builtin/clone.c             | 19 +++++++++++++------
 t/t7406-submodule-update.sh | 15 +++++++++++++++
 3 files changed, 33 insertions(+), 7 deletions(-)

diff --git a/Documentation/git-clone.txt b/Documentation/git-clone.txt
index 6bf000d..6db7b6d 100644
--- a/Documentation/git-clone.txt
+++ b/Documentation/git-clone.txt
@@ -14,7 +14,7 @@ SYNOPSIS
 	  [-o <name>] [-b <name>] [-u <upload-pack>] [--reference <repository>]
 	  [--dissociate] [--separate-git-dir <git dir>]
 	  [--depth <depth>] [--[no-]single-branch]
-	  [--recursive | --recurse-submodules] [--] <repository>
+	  [--recursive | --recurse-submodules] [--jobs <n>] [--] <repository>
 	  [<directory>]
 
 DESCRIPTION
@@ -221,6 +221,10 @@ objects from the source repository into a pack in the cloned repository.
 	The result is Git repository can be separated from working
 	tree.
 
+-j <n>::
+--jobs <n>::
+	The number of submodules fetched at the same time.
+	Defaults to the `submodule.fetchJobs` option.
 
 <repository>::
 	The (possibly remote) repository to clone from.  See the
diff --git a/builtin/clone.c b/builtin/clone.c
index a0b3cd9..b004fb4 100644
--- a/builtin/clone.c
+++ b/builtin/clone.c
@@ -50,6 +50,7 @@ static int option_progress = -1;
 static struct string_list option_config;
 static struct string_list option_reference;
 static int option_dissociate;
+static int max_jobs = -1;
 
 static struct option builtin_clone_options[] = {
 	OPT__VERBOSITY(&option_verbosity),
@@ -72,6 +73,8 @@ static struct option builtin_clone_options[] = {
 		    N_("initialize submodules in the clone")),
 	OPT_BOOL(0, "recurse-submodules", &option_recursive,
 		    N_("initialize submodules in the clone")),
+	OPT_INTEGER('j', "jobs", &max_jobs,
+		    N_("number of submodules cloned in parallel")),
 	OPT_STRING(0, "template", &option_template, N_("template-directory"),
 		   N_("directory from which templates will be used")),
 	OPT_STRING_LIST(0, "reference", &option_reference, N_("repo"),
@@ -95,10 +98,6 @@ static struct option builtin_clone_options[] = {
 	OPT_END()
 };
 
-static const char *argv_submodule[] = {
-	"submodule", "update", "--init", "--recursive", NULL
-};
-
 static const char *get_repo_path_1(struct strbuf *path, int *is_bundle)
 {
 	static char *suffix[] = { "/.git", "", ".git/.git", ".git" };
@@ -724,8 +723,16 @@ static int checkout(void)
 	err |= run_hook_le(NULL, "post-checkout", sha1_to_hex(null_sha1),
 			   sha1_to_hex(sha1), "1", NULL);
 
-	if (!err && option_recursive)
-		err = run_command_v_opt(argv_submodule, RUN_GIT_CMD);
+	if (!err && option_recursive) {
+		struct argv_array args = ARGV_ARRAY_INIT;
+		argv_array_pushl(&args, "submodule", "update", "--init", "--recursive", NULL);
+
+		if (max_jobs != -1)
+			argv_array_pushf(&args, "--jobs=%d", max_jobs);
+
+		err = run_command_v_opt(args.argv, RUN_GIT_CMD);
+		argv_array_clear(&args);
+	}
 
 	return err;
 }
diff --git a/t/t7406-submodule-update.sh b/t/t7406-submodule-update.sh
index 7fd5142..090891e 100755
--- a/t/t7406-submodule-update.sh
+++ b/t/t7406-submodule-update.sh
@@ -786,4 +786,19 @@ test_expect_success 'submodule update can be run in parallel' '
 	 grep "9 tasks" trace.out
 	)
 '
+
+test_expect_success 'git clone passes the parallel jobs config on to submodules' '
+	test_when_finished "rm -rf super4" &&
+	GIT_TRACE=$(pwd)/trace.out git clone --recurse-submodules --jobs 7 . super4 &&
+	grep "7 tasks" trace.out &&
+	rm -rf super4 &&
+	git config --global submodule.fetchJobs 8 &&
+	GIT_TRACE=$(pwd)/trace.out git clone --recurse-submodules . super4 &&
+	grep "8 tasks" trace.out &&
+	rm -rf super4 &&
+	GIT_TRACE=$(pwd)/trace.out git clone --recurse-submodules --jobs 9 . super4 &&
+	grep "9 tasks" trace.out &&
+	rm -rf super4
+'
+
 test_done
-- 
2.7.0.rc0.35.gb0f7869.dirty

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* Re: [PATCHv9 1/6] submodule-config: keep update strategy around
  2016-02-09 20:54 ` [PATCHv9 1/6] submodule-config: keep update strategy around Stefan Beller
@ 2016-02-09 21:08   ` Junio C Hamano
  2016-02-09 22:19     ` Stefan Beller
  2016-02-09 21:49   ` Jonathan Nieder
  2016-02-09 22:32   ` Junio C Hamano
  2 siblings, 1 reply; 30+ messages in thread
From: Junio C Hamano @ 2016-02-09 21:08 UTC (permalink / raw)
  To: Stefan Beller; +Cc: jrnieder, git, Jens.Lehmann

Stefan Beller <sbeller@google.com> writes:

> Currently submodule.<name>.update is only handled by git-submodule.sh.
> C code will start to need to make use of that value as more of the
> functionality of git-submodule.sh moves into library code in C.
>
> Add the update field to 'struct submodule' and populate it so it can
> be read as sm->update or from sm->update_command.
>
> Signed-off-by: Stefan Beller <sbeller@google.com>
> ---
>  submodule-config.c | 22 ++++++++++++++++++++++
>  submodule-config.h | 11 +++++++++++
>  2 files changed, 33 insertions(+)
>
> diff --git a/submodule-config.c b/submodule-config.c
> index afe0ea8..a1af5de 100644
> --- a/submodule-config.c
> +++ b/submodule-config.c
> @@ -194,6 +194,8 @@ static struct submodule *lookup_or_create_by_name(struct submodule_cache *cache,
>  
>  	submodule->path = NULL;
>  	submodule->url = NULL;
> +	submodule->update = SM_UPDATE_UNSPECIFIED;
> +	submodule->update_command = NULL;
>  	submodule->fetch_recurse = RECURSE_SUBMODULES_NONE;
>  	submodule->ignore = NULL;
>  
> @@ -311,6 +313,26 @@ static int parse_config(const char *var, const char *value, void *data)
>  			free((void *) submodule->url);
>  			submodule->url = xstrdup(value);
>  		}
> +	} else if (!strcmp(item.buf, "update")) {
> +		if (!value)
> +			ret = config_error_nonbool(var);
> +		else if (!me->overwrite &&
> +		    submodule->update != SM_UPDATE_UNSPECIFIED)

Funny indentation here (locally fixable).

Thanks to this change, we do not need "'!= NULL' removal" for this
location in 2/6.

> +			warn_multiple_config(me->commit_sha1, submodule->name,
> +					     "update");
> +		else {
> +			submodule->update_command = NULL;
> +			if (!strcmp(value, "none"))
> +				submodule->update = SM_UPDATE_NONE;
> +			else if (!strcmp(value, "checkout"))
> +				submodule->update = SM_UPDATE_CHECKOUT;
> +			else if (!strcmp(value, "rebase"))
> +				submodule->update = SM_UPDATE_REBASE;
> +			else if (!strcmp(value, "merge"))
> +				submodule->update = SM_UPDATE_MERGE;
> +			else if (!skip_prefix(value, "!", &submodule->update_command))
> +				die(_("invalid value for %s"), var);
> +		}
>  	}
>  
>  	strbuf_release(&name);
> diff --git a/submodule-config.h b/submodule-config.h
> index 9061e4e..e3bd56e 100644
> --- a/submodule-config.h
> +++ b/submodule-config.h
> @@ -4,6 +4,15 @@
>  #include "hashmap.h"
>  #include "strbuf.h"
>  
> +enum submodule_update_type {
> +	SM_UPDATE_CHECKOUT,
> +	SM_UPDATE_REBASE,
> +	SM_UPDATE_MERGE,
> +	SM_UPDATE_NONE,
> +	SM_UPDATE_COMMAND,
> +	SM_UPDATE_UNSPECIFIED
> +};
> +
>  /*
>   * Submodule entry containing the information about a certain submodule
>   * in a certain revision.
> @@ -14,6 +23,8 @@ struct submodule {
>  	const char *url;
>  	int fetch_recurse;
>  	const char *ignore;
> +	enum submodule_update_type update;
> +	const char *update_command;
>  	/* the sha1 blob id of the responsible .gitmodules file */
>  	unsigned char gitmodules_sha1[20];
>  };

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCHv9 3/6] fetching submodules: respect `submodule.fetchJobs` config option
  2016-02-09 20:54 ` [PATCHv9 3/6] fetching submodules: respect `submodule.fetchJobs` config option Stefan Beller
@ 2016-02-09 21:12   ` Junio C Hamano
  2016-02-09 22:34   ` Jonathan Nieder
  1 sibling, 0 replies; 30+ messages in thread
From: Junio C Hamano @ 2016-02-09 21:12 UTC (permalink / raw)
  To: Stefan Beller; +Cc: jrnieder, git, Jens.Lehmann

Stefan Beller <sbeller@google.com> writes:

> This allows to configure fetching and updating in parallel
> without having the command line option.
>
> This moved the responsibility to determine how many parallel processes
> to start from builtin/fetch to submodule.c as we need a way to communicate
> "The user did not specify the number of parallel processes in the command
> line options" in the builtin fetch. The submodule code takes care of
> the precedence (CLI > config > default).
>
> Signed-off-by: Stefan Beller <sbeller@google.com>
> ---

This got a lot simpler to read, I think, even though this comes with
a better error checking ;-)

Thanks.

>  Documentation/config.txt    |  6 ++++++
>  builtin/fetch.c             |  2 +-
>  submodule.c                 | 17 ++++++++++++++++-
>  submodule.h                 |  1 +
>  t/t5526-fetch-submodules.sh | 14 ++++++++++++++
>  5 files changed, 38 insertions(+), 2 deletions(-)
>
> diff --git a/Documentation/config.txt b/Documentation/config.txt
> index 2d06b11..3b02732 100644
> --- a/Documentation/config.txt
> +++ b/Documentation/config.txt
> @@ -2646,6 +2646,12 @@ submodule.<name>.ignore::
>  	"--ignore-submodules" option. The 'git submodule' commands are not
>  	affected by this setting.
>  
> +submodule.fetchJobs::
> +	Specifies how many submodules are fetched/cloned at the same time.
> +	A positive integer allows up to that number of submodules fetched
> +	in parallel. A value of 0 will give some reasonable default.
> +	If unset, it defaults to 1.
> +
>  tag.sort::
>  	This variable controls the sort ordering of tags when displayed by
>  	linkgit:git-tag[1]. Without the "--sort=<value>" option provided, the
> diff --git a/builtin/fetch.c b/builtin/fetch.c
> index 586840d..5aa1c2d 100644
> --- a/builtin/fetch.c
> +++ b/builtin/fetch.c
> @@ -37,7 +37,7 @@ static int prune = -1; /* unspecified */
>  static int all, append, dry_run, force, keep, multiple, update_head_ok, verbosity;
>  static int progress = -1, recurse_submodules = RECURSE_SUBMODULES_DEFAULT;
>  static int tags = TAGS_DEFAULT, unshallow, update_shallow;
> -static int max_children = 1;
> +static int max_children = -1;
>  static const char *depth;
>  static const char *upload_pack;
>  static struct strbuf default_rla = STRBUF_INIT;
> diff --git a/submodule.c b/submodule.c
> index b83939c..fd763f5 100644
> --- a/submodule.c
> +++ b/submodule.c
> @@ -15,6 +15,7 @@
>  #include "thread-utils.h"
>  
>  static int config_fetch_recurse_submodules = RECURSE_SUBMODULES_ON_DEMAND;
> +static int parallel_jobs = 1;
>  static struct string_list changed_submodule_paths;
>  static int initialized_fetch_ref_tips;
>  static struct sha1_array ref_tips_before_fetch;
> @@ -169,7 +170,13 @@ void set_diffopt_flags_from_submodule_config(struct diff_options *diffopt,
>  
>  int submodule_config(const char *var, const char *value, void *cb)
>  {
> -	if (starts_with(var, "submodule."))
> +	if (!strcmp(var, "submodule.fetchjobs")) {
> +		unsigned long val;
> +		if (!git_parse_ulong(value, &val) || 0 > val || val >= INT_MAX)
> +			die(_("Error parsing submodule.fetchJobs %s"), value);
> +		parallel_jobs = val;
> +		return 0;
> +	} else if (starts_with(var, "submodule."))
>  		return parse_submodule_config_option(var, value);
>  	else if (!strcmp(var, "fetch.recursesubmodules")) {
>  		config_fetch_recurse_submodules = parse_fetch_recurse_submodules_arg(var, value);
> @@ -751,6 +758,9 @@ int fetch_populated_submodules(const struct argv_array *options,
>  	argv_array_push(&spf.args, "--recurse-submodules-default");
>  	/* default value, "--submodule-prefix" and its value are added later */
>  
> +	if (max_parallel_jobs < 0)
> +		max_parallel_jobs = parallel_jobs;
> +
>  	calculate_changed_submodule_paths();
>  	run_processes_parallel(max_parallel_jobs,
>  			       get_next_submodule,
> @@ -1097,3 +1107,8 @@ void connect_work_tree_and_git_dir(const char *work_tree, const char *git_dir)
>  	strbuf_release(&rel_path);
>  	free((void *)real_work_tree);
>  }
> +
> +int parallel_submodules(void)
> +{
> +	return parallel_jobs;
> +}
> diff --git a/submodule.h b/submodule.h
> index cbc0003..95babc5 100644
> --- a/submodule.h
> +++ b/submodule.h
> @@ -41,5 +41,6 @@ int find_unpushed_submodules(unsigned char new_sha1[20], const char *remotes_nam
>  		struct string_list *needs_pushing);
>  int push_unpushed_submodules(unsigned char new_sha1[20], const char *remotes_name);
>  void connect_work_tree_and_git_dir(const char *work_tree, const char *git_dir);
> +int parallel_submodules(void);
>  
>  #endif
> diff --git a/t/t5526-fetch-submodules.sh b/t/t5526-fetch-submodules.sh
> index 1241146..954d0e4 100755
> --- a/t/t5526-fetch-submodules.sh
> +++ b/t/t5526-fetch-submodules.sh
> @@ -471,4 +471,18 @@ test_expect_success "don't fetch submodule when newly recorded commits are alrea
>  	test_i18ncmp expect.err actual.err
>  '
>  
> +test_expect_success 'fetching submodules respects parallel settings' '
> +	git config fetch.recurseSubmodules true &&
> +	(
> +		cd downstream &&
> +		GIT_TRACE=$(pwd)/trace.out git fetch --jobs 7 &&
> +		grep "7 tasks" trace.out &&
> +		git config submodule.fetchJobs 8 &&
> +		GIT_TRACE=$(pwd)/trace.out git fetch &&
> +		grep "8 tasks" trace.out &&
> +		GIT_TRACE=$(pwd)/trace.out git fetch --jobs 9 &&
> +		grep "9 tasks" trace.out
> +	)
> +'
> +
>  test_done

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCHv9 4/6] git submodule update: have a dedicated helper for cloning
  2016-02-09 20:54 ` [PATCHv9 4/6] git submodule update: have a dedicated helper for cloning Stefan Beller
@ 2016-02-09 21:24   ` Junio C Hamano
  2016-02-10  0:37   ` Jonathan Nieder
  2016-02-10  2:26   ` Jacob Keller
  2 siblings, 0 replies; 30+ messages in thread
From: Junio C Hamano @ 2016-02-09 21:24 UTC (permalink / raw)
  To: Stefan Beller; +Cc: jrnieder, git, Jens.Lehmann

Stefan Beller <sbeller@google.com> writes:

> +	for (; pp->count < pp->list.nr; pp->count++) {
> +		const struct submodule *sub = NULL;
> +		const struct cache_entry *ce = pp->list.entries[pp->count];
> +		struct strbuf displaypath_sb = STRBUF_INIT;
> +		struct strbuf sb = STRBUF_INIT;
> +		const char *displaypath = NULL;
> +		char *url = NULL;
> +		int needs_cloning = 0;
> +
> +		if (ce_stage(ce)) {
> +			if (pp->recursive_prefix)
> +				strbuf_addf(err,
> +					"Skipping unmerged submodule %s/%s\n",
> +					pp->recursive_prefix, ce->name);

The funny indentation of the string is a workaround for overly deep
nesting, but is the overly deep nesting telling us that perhaps one
iteration of this loop can be an invocation of a helper function, I
wonder?

> +			else
> +				strbuf_addf(err,
> +					"Skipping unmerged submodule %s\n",
> +					ce->name);
> +			goto cleanup_and_continue;
> +		}

> +
> +		sub = submodule_from_path(null_sha1, ce->name);
> +
> +		if (pp->recursive_prefix)
> +			displaypath = relative_path(pp->recursive_prefix,
> +						    ce->name, &displaypath_sb);
> +		else
> +			displaypath = ce->name;
> +
> +		if ((pp->update && !strcmp(pp->update, "none")) ||
> +		    (!pp->update && sub->update == SM_UPDATE_NONE)) {

This looks a bit strange.  I wonder pp->update should also become
enum for the same reason why sub->update has become enum.  That way,
we need to be worried about parsing these tokens in one place where
a textual string "none" is translated to SM_UPDATE_NONE.  If we
started allowing "None" in the sub->update parse_config() in
submodule-config.c, we would want that new parsing rule propagated
to pp->update, right?

> +			strbuf_addf(err, "Skipping submodule '%s'\n",
> +				    displaypath);
> +			goto cleanup_and_continue;
> +		}

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCHv9 0/6]  Expose submodule parallelism to the user
  2016-02-09 20:54 [PATCHv9 0/6] Expose submodule parallelism to the user Stefan Beller
                   ` (5 preceding siblings ...)
  2016-02-09 20:54 ` [PATCHv9 6/6] clone: allow an explicit argument for parallel submodule clones Stefan Beller
@ 2016-02-09 21:39 ` Junio C Hamano
  2016-02-09 21:46   ` Stefan Beller
  6 siblings, 1 reply; 30+ messages in thread
From: Junio C Hamano @ 2016-02-09 21:39 UTC (permalink / raw)
  To: Stefan Beller; +Cc: jrnieder, git, Jens.Lehmann

Stefan Beller <sbeller@google.com> writes:

> This replaces sb/submodule-parallel-update.
> (which is based on sb/submodule-parallel-fetch)
>
> Thanks to Junio and Jonathan for review!
>
> * s/config_parallel_submodules/parallel_submodules/ as it is not in config any
>   more. Also ease up the default setting. 
>
> * use an enum for submodule update strategy

Your earlier sb/submodule-init will need to be rerolled on top, as
it depends on ->update being a string, I think.

> * This seems to clash with 00/20] refs backend.
>> Applied this on top of a merge between the current 'master' and
>> 'sb/submodule-parallel-update' topic to untangle the dependency;
>> otherwise there is no way for this topic to make progress X-<.
>
> Anything I can do to help with easing the clash?

Perhaps try to rebase the series on top of such a merge (with this
updated series) yourself and propose it as a basis for the next
reroll for David?  In short, working together with topic(s) that
touch the same area?

Thanks.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCHv9 0/6] Expose submodule parallelism to the user
  2016-02-09 21:39 ` [PATCHv9 0/6] Expose submodule parallelism to the user Junio C Hamano
@ 2016-02-09 21:46   ` Stefan Beller
  2016-02-09 22:03     ` Junio C Hamano
  0 siblings, 1 reply; 30+ messages in thread
From: Stefan Beller @ 2016-02-09 21:46 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Jonathan Nieder, git, Jens Lehmann

On Tue, Feb 9, 2016 at 1:39 PM, Junio C Hamano <gitster@pobox.com> wrote:
> Stefan Beller <sbeller@google.com> writes:
>
>> This replaces sb/submodule-parallel-update.
>> (which is based on sb/submodule-parallel-fetch)
>>
>> Thanks to Junio and Jonathan for review!
>>
>> * s/config_parallel_submodules/parallel_submodules/ as it is not in config any
>>   more. Also ease up the default setting.
>>
>> * use an enum for submodule update strategy
>
> Your earlier sb/submodule-init will need to be rerolled on top, as
> it depends on ->update being a string, I think.

Sure, I just want to prioritize this series as it is the bottleneck for all
other following things (init as well as the groups thing), I think.

>
>> * This seems to clash with 00/20] refs backend.
>>> Applied this on top of a merge between the current 'master' and
>>> 'sb/submodule-parallel-update' topic to untangle the dependency;
>>> otherwise there is no way for this topic to make progress X-<.
>>
>> Anything I can do to help with easing the clash?
>
> Perhaps try to rebase the series on top of such a merge (with this
> updated series) yourself and propose it as a basis for the next
> reroll for David?  In short, working together with topic(s) that
> touch the same area?

Ok, I'll see if I can find a better commit to base this series on.

>
> Thanks.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCHv9 1/6] submodule-config: keep update strategy around
  2016-02-09 20:54 ` [PATCHv9 1/6] submodule-config: keep update strategy around Stefan Beller
  2016-02-09 21:08   ` Junio C Hamano
@ 2016-02-09 21:49   ` Jonathan Nieder
  2016-02-09 22:32   ` Junio C Hamano
  2 siblings, 0 replies; 30+ messages in thread
From: Jonathan Nieder @ 2016-02-09 21:49 UTC (permalink / raw)
  To: Stefan Beller; +Cc: gitster, git, Jens.Lehmann

Hi,

Stefan Beller wrote:

> Signed-off-by: Stefan Beller <sbeller@google.com>
> ---
>  submodule-config.c | 22 ++++++++++++++++++++++
>  submodule-config.h | 11 +++++++++++
>  2 files changed, 33 insertions(+)

Yay, this is much clearer.  Thanks for indulging.

[...]
> +++ b/submodule-config.c
[...]
> @@ -311,6 +313,26 @@ static int parse_config(const char *var, const char *value, void *data)
> +			else if (!skip_prefix(value, "!", &submodule->update_command))
> +				die(_("invalid value for %s"), var);

Should this say
			else if (skip_prefix(value, "!", &submodule->update_command))
				submodule->update = SM_UPDATE_COMMAND;
			else
				die(...);

?

[...]
> --- a/submodule-config.h
> +++ b/submodule-config.h
> @@ -4,6 +4,15 @@
>  #include "hashmap.h"
>  #include "strbuf.h"
>  
> +enum submodule_update_type {
> +	SM_UPDATE_CHECKOUT,
> +	SM_UPDATE_REBASE,
> +	SM_UPDATE_MERGE,
> +	SM_UPDATE_NONE,
> +	SM_UPDATE_COMMAND,
> +	SM_UPDATE_UNSPECIFIED
> +};

If _UNSPECIFIED is equal to 0, that would futureproof this better
against zero-initialized submodule structs without ->update explicitly
set.

After those two changes and the whitespace change Junio suggested,
Reviewed-by: Jonathan Nieder <jrnieder@gmail.com>

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCHv9 2/6] submodule-config: drop check against NULL
  2016-02-09 20:54 ` [PATCHv9 2/6] submodule-config: drop check against NULL Stefan Beller
@ 2016-02-09 21:50   ` Jonathan Nieder
  0 siblings, 0 replies; 30+ messages in thread
From: Jonathan Nieder @ 2016-02-09 21:50 UTC (permalink / raw)
  To: Stefan Beller; +Cc: gitster, git, Jens.Lehmann

Stefan Beller wrote:

> Signed-off-by: Stefan Beller <sbeller@google.com>
> ---
>  submodule-config.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)

Reviewed-by: Jonathan Nieder <jrnieder@gmail.com>

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCHv9 0/6] Expose submodule parallelism to the user
  2016-02-09 21:46   ` Stefan Beller
@ 2016-02-09 22:03     ` Junio C Hamano
  2016-02-11 20:23       ` Junio C Hamano
  0 siblings, 1 reply; 30+ messages in thread
From: Junio C Hamano @ 2016-02-09 22:03 UTC (permalink / raw)
  To: Stefan Beller; +Cc: Jonathan Nieder, git, Jens Lehmann

Stefan Beller <sbeller@google.com> writes:

>>> * This seems to clash with 00/20] refs backend.
>>>> Applied this on top of a merge between the current 'master' and
>>>> 'sb/submodule-parallel-update' topic to untangle the dependency;
>>>> otherwise there is no way for this topic to make progress X-<.
>>>
>>> Anything I can do to help with easing the clash?
>>
>> Perhaps try to rebase the series on top of such a merge (with this
>> updated series) yourself and propose it as a basis for the next
>> reroll for David?  In short, working together with topic(s) that
>> touch the same area?
>
> Ok, I'll see if I can find a better commit to base this series on.

That is not what I meant.  I meant rebasing the refs-backend series
on top of a merge between this one and 'master', just like the way I
queued the refs-backend series on top of a merge between the
previous round of this series and 'master'.

These two topics want to update the same piece of code, so another
possibility is to rebase this series on top of a merge between
refs-backend and 'master', but the current iteration of refs-backend
already depends on the previous round of this topic.  Rebasing this
on top of refs-backend would involve first adjusting parts of
refs-backend that touched the same code as the previous round of
submodule-parallel-update touched so that refs-backend would work
directly on top of 'master', and then including the necessary change
to the refs-backend code while rebuilding submodule-parallel-update
on top of the result.  So I do not think you would go in that
direction.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCHv9 1/6] submodule-config: keep update strategy around
  2016-02-09 21:08   ` Junio C Hamano
@ 2016-02-09 22:19     ` Stefan Beller
  2016-02-09 22:29       ` Junio C Hamano
  0 siblings, 1 reply; 30+ messages in thread
From: Stefan Beller @ 2016-02-09 22:19 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Jonathan Nieder, git, Jens Lehmann

On Tue, Feb 9, 2016 at 1:08 PM, Junio C Hamano <gitster@pobox.com> wrote:
>> +     } else if (!strcmp(item.buf, "update")) {
>> +             if (!value)
>> +                     ret = config_error_nonbool(var);
>> +             else if (!me->overwrite &&
>> +                 submodule->update != SM_UPDATE_UNSPECIFIED)
>
> Funny indentation here (locally fixable).

I looked through the code base and reread our CodingGuidelines
to find out what is considered correct. (I assumed we had a gnu-ish
coding style w.r.t. breaking overly long lines in conditions, which is
having the next line be indented with 4 spaces.)

So I assume by funny you mean "the next line doesn't start below the
opening parenthesis"?

That would seem to be consistent as it fits both the 4 space indentation
which is always found below "if (", but we have more than 4 spaces in
other places such as overly long return statements, i.e. refs.c, l 610
(@origin/master)
        return starts_with(refname, "refs/heads/") ||
                starts_with(refname, "refs/remotes/") ||
                starts_with(refname, "refs/notes/") ||
                !strcmp(refname, "HEAD");

which also doesn't seem to align perfectly to me.

Looking for places, which have the pattern "else if (..." with line
break, I found several different styles.

builtin/mv.c (@origin/master) has two occurrences of

    else if (...
    <2 additional tabs> condition continues here

and another of

    else if (...
    <1 tab + 1 SP> condition nearly aligned to statement below

Looking at remote.c, I can find 1 tab, plus 3 spaces.
...
Giving up to come up with an idea for space based rule other than
"visually align it below the original statement".

Puzzled,
Stefan

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCHv9 1/6] submodule-config: keep update strategy around
  2016-02-09 22:19     ` Stefan Beller
@ 2016-02-09 22:29       ` Junio C Hamano
  0 siblings, 0 replies; 30+ messages in thread
From: Junio C Hamano @ 2016-02-09 22:29 UTC (permalink / raw)
  To: Stefan Beller; +Cc: Jonathan Nieder, git, Jens Lehmann

Stefan Beller <sbeller@google.com> writes:

> On Tue, Feb 9, 2016 at 1:08 PM, Junio C Hamano <gitster@pobox.com> wrote:
>>> +     } else if (!strcmp(item.buf, "update")) {
>>> +             if (!value)
>>> +                     ret = config_error_nonbool(var);
>>> +             else if (!me->overwrite &&
>>> +                 submodule->update != SM_UPDATE_UNSPECIFIED)
>>
>> Funny indentation here (locally fixable).
>
> I looked through the code base and reread our CodingGuidelines
> to find out what is considered correct. (I assumed we had a gnu-ish
> coding style w.r.t. breaking overly long lines in conditions, which is
> having the next line be indented with 4 spaces.)
>
> So I assume by funny you mean "the next line doesn't start below the
> opening parenthesis"?

I think we typically do one of the two:

        if (A &&
            B && C && ...)

or

        if (A &&
                B && C && ...)

That is, the second line may align just inside the open paren on the
first line, or even deeper, but never shallower.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCHv9 1/6] submodule-config: keep update strategy around
  2016-02-09 20:54 ` [PATCHv9 1/6] submodule-config: keep update strategy around Stefan Beller
  2016-02-09 21:08   ` Junio C Hamano
  2016-02-09 21:49   ` Jonathan Nieder
@ 2016-02-09 22:32   ` Junio C Hamano
  2016-02-11 20:00     ` Junio C Hamano
  2 siblings, 1 reply; 30+ messages in thread
From: Junio C Hamano @ 2016-02-09 22:32 UTC (permalink / raw)
  To: Stefan Beller; +Cc: jrnieder, git, Jens.Lehmann

Stefan Beller <sbeller@google.com> writes:

> +		else {
> +			submodule->update_command = NULL;
> +			if (!strcmp(value, "none"))
> +				submodule->update = SM_UPDATE_NONE;
> +			else if (!strcmp(value, "checkout"))
> +				submodule->update = SM_UPDATE_CHECKOUT;
> +			else if (!strcmp(value, "rebase"))
> +				submodule->update = SM_UPDATE_REBASE;
> +			else if (!strcmp(value, "merge"))
> +				submodule->update = SM_UPDATE_MERGE;
> +			else if (!skip_prefix(value, "!", &submodule->update_command))
> +				die(_("invalid value for %s"), var);
> +		}

I think this "string to enum" parser can become a separate helper
function in this patch, so that later patch can use it to parse the
"--update" option in the update_clone() function.

That would solve the slight inconsistency we have seen

+		if ((pp->update && !strcmp(pp->update, "none")) ||
+		    (!pp->update && sub->update == SM_UPDATE_NONE)) {

in that patch.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCHv9 3/6] fetching submodules: respect `submodule.fetchJobs` config option
  2016-02-09 20:54 ` [PATCHv9 3/6] fetching submodules: respect `submodule.fetchJobs` config option Stefan Beller
  2016-02-09 21:12   ` Junio C Hamano
@ 2016-02-09 22:34   ` Jonathan Nieder
  2016-02-10  0:11     ` Stefan Beller
  1 sibling, 1 reply; 30+ messages in thread
From: Jonathan Nieder @ 2016-02-09 22:34 UTC (permalink / raw)
  To: Stefan Beller; +Cc: gitster, git, Jens.Lehmann

Hi,

Stefan Beller wrote:

> +++ b/submodule.c
[...]
> @@ -169,7 +170,13 @@ void set_diffopt_flags_from_submodule_config(struct diff_options *diffopt,
>  
>  int submodule_config(const char *var, const char *value, void *cb)
>  {
> -	if (starts_with(var, "submodule."))
> +	if (!strcmp(var, "submodule.fetchjobs")) {
> +		unsigned long val;
> +		if (!git_parse_ulong(value, &val) || 0 > val || val >= INT_MAX)
> +			die(_("Error parsing submodule.fetchJobs %s"), value);

'val < 0' would be more idiomatic than '0 > val'.  More importantly,
val is an unsigned long.  How could it be negative?

Is it intended that val == INT_MAX is not permitted?  I would have
expected something like the following to work:

		unsigned long val = git_config_ulong(var, value);
		if (val > INT_MAX) {
			errno = ERANGE;
			die_bad_number(var, value);
		}

(using die_bad_number from config.c).  Or config.c could gain a
git_config_nonnegative_int helper:

	static int git_parse_nonnegative_int(const char *value, int *ret)
	{
		uintmax_t tmp;
		if (!git_parse_unsigned(value, &tmp, maximum_signed_value_of_type(int)))
			return 0;
		*ret = tmp;
		return 1;
	}

	int git_config_nonnegative_int(const char *name, const char *value)
	{
		int ret;
		if (!git_parse_nonnegative_int(value, &ret))
			die_bad_number(name, value);
		return ret;
	}

allowing

		parallel_jobs = git_config_nonnegative_int(var, val);
		return 0;

[...]
> @@ -751,6 +758,9 @@ int fetch_populated_submodules(const struct argv_array *options,
>  	argv_array_push(&spf.args, "--recurse-submodules-default");
>  	/* default value, "--submodule-prefix" and its value are added later */
>  
> +	if (max_parallel_jobs < 0)
> +		max_parallel_jobs = parallel_jobs;

Makes sense.

[...]
> @@ -1097,3 +1107,8 @@ void connect_work_tree_and_git_dir(const char *work_tree, const char *git_dir)
>  	strbuf_release(&rel_path);
>  	free((void *)real_work_tree);
>  }
> +
> +int parallel_submodules(void)
> +{
> +	return parallel_jobs;
> +}

Is this helper used?

[...]
> +++ b/submodule.h
> @@ -41,5 +41,6 @@ int find_unpushed_submodules(unsigned char new_sha1[20], const char *remotes_nam
>  		struct string_list *needs_pushing);
>  int push_unpushed_submodules(unsigned char new_sha1[20], const char *remotes_name);
>  void connect_work_tree_and_git_dir(const char *work_tree, const char *git_dir);
> +int parallel_submodules(void);

optional trick: one way to avoid merge conflicts is to add each function
at some logical place in the file instead of at the end.  Another nice
side-effect is that it makes it easier to read through the header since
functions appear in some appropriate order.

E.g. this method could go near the config functions, I suppose.

> --- a/t/t5526-fetch-submodules.sh
> +++ b/t/t5526-fetch-submodules.sh
> @@ -471,4 +471,18 @@ test_expect_success "don't fetch submodule when newly recorded commits are alrea
>  	test_i18ncmp expect.err actual.err
>  '
>  
> +test_expect_success 'fetching submodules respects parallel settings' '

Makes sense.  Same trick about inserting in some appropriate place in
the middle of the file applies here, too.  In tests it also ends up
being useful for finding when tests overlap or when there's a gap in
coverage.

The documentation says that '0' does something appropriate (DWIM or
something?  I didn't understand it).  Perhaps a test for that behavior
would be useful, too.

Thanks,
Jonathan

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCHv9 3/6] fetching submodules: respect `submodule.fetchJobs` config option
  2016-02-09 22:34   ` Jonathan Nieder
@ 2016-02-10  0:11     ` Stefan Beller
  2016-02-10  1:12       ` Junio C Hamano
  0 siblings, 1 reply; 30+ messages in thread
From: Stefan Beller @ 2016-02-10  0:11 UTC (permalink / raw)
  To: Jonathan Nieder; +Cc: Junio C Hamano, git, Jens Lehmann

On Tue, Feb 9, 2016 at 2:34 PM, Jonathan Nieder <jrnieder@gmail.com> wrote:
> Hi,
>
> Stefan Beller wrote:
>
>> +++ b/submodule.c
> [...]
>> @@ -169,7 +170,13 @@ void set_diffopt_flags_from_submodule_config(struct diff_options *diffopt,
>>
>>  int submodule_config(const char *var, const char *value, void *cb)
>>  {
>> -     if (starts_with(var, "submodule."))
>> +     if (!strcmp(var, "submodule.fetchjobs")) {
>> +             unsigned long val;
>> +             if (!git_parse_ulong(value, &val) || 0 > val || val >= INT_MAX)
>> +                     die(_("Error parsing submodule.fetchJobs %s"), value);
>
> 'val < 0' would be more idiomatic than '0 > val'.  More importantly,
> val is an unsigned long.  How could it be negative?
>
> Is it intended that val == INT_MAX is not permitted?  I would have
> expected something like the following to work:
>
>                 unsigned long val = git_config_ulong(var, value);
>                 if (val > INT_MAX) {
>                         errno = ERANGE;
>                         die_bad_number(var, value);
>                 }
>
> (using die_bad_number from config.c).  Or config.c could gain a
> git_config_nonnegative_int helper:
>
>         static int git_parse_nonnegative_int(const char *value, int *ret)
>         {
>                 uintmax_t tmp;
>                 if (!git_parse_unsigned(value, &tmp, maximum_signed_value_of_type(int)))
>                         return 0;
>                 *ret = tmp;
>                 return 1;
>         }
>
>         int git_config_nonnegative_int(const char *name, const char *value)
>         {
>                 int ret;
>                 if (!git_parse_nonnegative_int(value, &ret))
>                         die_bad_number(name, value);
>                 return ret;
>         }
>
> allowing
>
>                 parallel_jobs = git_config_nonnegative_int(var, val);
>                 return 0;

And I thought we wanted to prevent inventing yet another helper?
Although this looks very appealing, so let's do that instead.

>
> [...]
>> @@ -751,6 +758,9 @@ int fetch_populated_submodules(const struct argv_array *options,
>>       argv_array_push(&spf.args, "--recurse-submodules-default");
>>       /* default value, "--submodule-prefix" and its value are added later */
>>
>> +     if (max_parallel_jobs < 0)
>> +             max_parallel_jobs = parallel_jobs;
>
> Makes sense.
>
> [...]
>> @@ -1097,3 +1107,8 @@ void connect_work_tree_and_git_dir(const char *work_tree, const char *git_dir)
>>       strbuf_release(&rel_path);
>>       free((void *)real_work_tree);
>>  }
>> +
>> +int parallel_submodules(void)
>> +{
>> +     return parallel_jobs;
>> +}
>
> Is this helper used?

Not yet, but I rather want to introduce it here as it is easier to review here
than in a later patch?

>
> [...]
>> +++ b/submodule.h
>> @@ -41,5 +41,6 @@ int find_unpushed_submodules(unsigned char new_sha1[20], const char *remotes_nam
>>               struct string_list *needs_pushing);
>>  int push_unpushed_submodules(unsigned char new_sha1[20], const char *remotes_name);
>>  void connect_work_tree_and_git_dir(const char *work_tree, const char *git_dir);
>> +int parallel_submodules(void);
>
> optional trick: one way to avoid merge conflicts is to add each function
> at some logical place in the file instead of at the end.  Another nice
> side-effect is that it makes it easier to read through the header since
> functions appear in some appropriate order.
>
> E.g. this method could go near the config functions, I suppose.

I was not sure how relevant this is to configuration as it seems a bit
special w.r.t. submodules to me (it's not configured in .gitmodules, but a
standard configuration in .git/config after all).

>
>> --- a/t/t5526-fetch-submodules.sh
>> +++ b/t/t5526-fetch-submodules.sh
>> @@ -471,4 +471,18 @@ test_expect_success "don't fetch submodule when newly recorded commits are alrea
>>       test_i18ncmp expect.err actual.err
>>  '
>>
>> +test_expect_success 'fetching submodules respects parallel settings' '
>
> Makes sense.  Same trick about inserting in some appropriate place in
> the middle of the file applies here, too.  In tests it also ends up
> being useful for finding when tests overlap or when there's a gap in
> coverage.
>
> The documentation says that '0' does something appropriate (DWIM or
> something?  I didn't understand it).  Perhaps a test for that behavior
> would be useful, too.

The 0 is currently using 'online_cpus()' by default as a 'something
appropriate',
which is not really appropriate though. The documentation is worded
such that people should not rely on certain behaviors of '0'.

Maybe instead we could just hardcode 0 to 2 for now and test for that
instead. My reasoning for '2':
* provides the least amount of parallelism
* when fetching/cloning you have phases when the server works or when
   the client works. In absence of any good metric, I estimate the work load
   to be split 50:50. If that is the case the actual optimal number is 2 with
   these two jobs being shifted such that each client and server are
   loaded all the time.


>
> Thanks,
> Jonathan

Thanks,
Stefan

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCHv9 4/6] git submodule update: have a dedicated helper for cloning
  2016-02-09 20:54 ` [PATCHv9 4/6] git submodule update: have a dedicated helper for cloning Stefan Beller
  2016-02-09 21:24   ` Junio C Hamano
@ 2016-02-10  0:37   ` Jonathan Nieder
  2016-02-10  2:26   ` Jacob Keller
  2 siblings, 0 replies; 30+ messages in thread
From: Jonathan Nieder @ 2016-02-10  0:37 UTC (permalink / raw)
  To: Stefan Beller; +Cc: gitster, git, Jens.Lehmann

Stefan Beller wrote:

> This introduces a new helper function in git submodule--helper
> which takes care of cloning all submodules, which we want to
> parallelize eventually.

Neat.

[...]
> As we can only access the stderr channel from within the parallel
> processing engine, we need to reroute the error message for
> specified but initialized submodules to stderr. As it is an error
> message, this should have gone to stderr in the first place, so it
> is a bug fix along the way.

In principle this could have happened in a separate preparatory patch
(so that the move to C would have no functional effect) but I don't
think that benefit alone is worth the churn of going back and doing
that.

[...]
> +++ b/builtin/submodule--helper.c
> @@ -255,6 +255,235 @@ static int module_clone(int argc, const char **argv, const char *prefix)
>  	return 0;
>  }
>  
> +static int git_submodule_config(const char *var, const char *value, void *cb)
> +{
> +	return submodule_config(var, value, cb);
> +}

Can callers use submodule_config directly?


> +struct submodule_update_clone {
> +	/* states */
> +	int count;
> +	int print_unmatched;

I'd add a blank line here.

> +	/* configuration */
> +	int quiet;
> +	const char *reference;
> +	const char *depth;
> +	const char *update;
> +	const char *recursive_prefix;
> +	const char *prefix;
> +	struct module_list list;
> +	struct string_list projectlines;
> +	struct pathspec pathspec;
> +};
> +#define SUBMODULE_UPDATE_CLONE_INIT {0, 0, 0, NULL, NULL, NULL, NULL, NULL, MODULE_LIST_INIT, STRING_LIST_INIT_DUP}

What is this struct used for?  Maybe this would be clearer if it
appeared immediately before the function that used it.

This is the shared cb object passed to run_processes_parallel, right?
Some name like parallel_clone_opts or parallel_clone_cb might work.

What do the fields represent?  E.g., what is count a count of, what
does it mean when print_unmatched is true, etc?

Would it make sense to put the options in a separate struct from the
state fields (so e.g. it could be const)?  The options are easier to
understand because they correspond to command-line options, while the
state fields are something different and internal.

[...]
> +static int update_clone_get_next_task(struct child_process *cp,
> +				      struct strbuf *err,
> +				      void *pp_cb,
> +				      void **pp_task_cb)
> +{
> +	struct submodule_update_clone *pp = pp_cb;
> +
> +	for (; pp->count < pp->list.nr; pp->count++) {

Could some of this loop body be factored out into separate functions?
(e.g. whether to skip a submodule, getting the display path, ...).

[...]
> +		/*
> +		 * Looking up the url in .git/config.
> +		 * We must not fall back to .gitmodules as we only want
> +		 * to process configured submodules.
> +		 */
> +		strbuf_reset(&sb);
> +		strbuf_addf(&sb, "submodule.%s.url", sub->name);
> +		git_config_get_string(sb.buf, &url);
> +		if (!url) {

I can see how the submodule API would be overkill for this.  But it's
still tempting to use it.  'struct submodule' could gain a field
representing whether the submodule is initialized (i.e., whether it
appears in .git/config).

I wonder whether the strbuf_reset + addf idiom would be a good thing
to factor out into a mkpath()-like function --- i.e., something like

		git_config_get_string(fmt(&sb, "submodule.%s.url", sub->name), &url);

That's a little less risky than mkpath was because it explicitly
mentions &sb but maybe it's too magical.

[...]
> +static int update_clone_start_failure(struct child_process *cp,

Will review the rest when I get home.

Thanks,
Jonathan

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCHv9 3/6] fetching submodules: respect `submodule.fetchJobs` config option
  2016-02-10  0:11     ` Stefan Beller
@ 2016-02-10  1:12       ` Junio C Hamano
  2016-02-10  2:04         ` Junio C Hamano
  0 siblings, 1 reply; 30+ messages in thread
From: Junio C Hamano @ 2016-02-10  1:12 UTC (permalink / raw)
  To: Stefan Beller; +Cc: Jonathan Nieder, git, Jens Lehmann

Stefan Beller <sbeller@google.com> writes:

>>         int git_config_nonnegative_int(const char *name, const char *value)
>>         {
>>                 int ret;
>>                 if (!git_parse_nonnegative_int(value, &ret))
>>                         die_bad_number(name, value);
>>                 return ret;
>>         }
>>
>> allowing
>>
>>                 parallel_jobs = git_config_nonnegative_int(var, val);
>>                 return 0;
>
> And I thought we wanted to prevent inventing yet another helper?

I actually do not think we mind git_parse_int(), git_parse_long(),
and git_parse_uint() to complement git_parse_ulong().  I am not
enthused by the "nonnegative-int" thing, though.

Do we have enough cases where we want to use signed type and reserve
negative value for our own internal use (e.g. "unspecified yet")?
If not, a very generic git_config_int() with a caller specific range
check wouldn't look _so_ bad.

	parallel_jobs = git_config_int(var, val);
        if (parallel_jobs < 0)
	     	some corrective action;
	return 0;

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCHv9 3/6] fetching submodules: respect `submodule.fetchJobs` config option
  2016-02-10  1:12       ` Junio C Hamano
@ 2016-02-10  2:04         ` Junio C Hamano
  0 siblings, 0 replies; 30+ messages in thread
From: Junio C Hamano @ 2016-02-10  2:04 UTC (permalink / raw)
  To: Stefan Beller; +Cc: Jonathan Nieder, git, Jens Lehmann

Junio C Hamano <gitster@pobox.com> writes:

> I actually do not think we mind git_parse_int(), git_parse_long(),
> and git_parse_uint() to complement git_parse_ulong().  I am not
> enthused by the "nonnegative-int" thing, though.
>
> Do we have enough cases where we want to use signed type and reserve
> negative value for our own internal use (e.g. "unspecified yet")?
> If not, a very generic git_config_int() with a caller specific range
> check wouldn't look _so_ bad.
>
> 	parallel_jobs = git_config_int(var, val);
>         if (parallel_jobs < 0)
> 	     	some corrective action;
> 	return 0;

And of course, if we _do_ have many callsites where the caller wants
a sub-range of the range allowed by the underlying type, we can have

 	<T> git_config_<T>_bounded(var, val, <T> lb, <T> ub)

where T are the usual integral types and lb and ub are (inclusive)
lower and upper bounds.  With that, Jonathan's non-negative-int
would fall out as a natural consequence:

	parallel_jobs = git_config_int_bounded(var, val, 0, INT_MAX);

I vaguely recall that Michael Haggerty had a series to add and use
helpers to parse values of integral types and something like that
interface was suggested in the discussion; and it may even have used
a word better than "bounded".

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCHv9 4/6] git submodule update: have a dedicated helper for cloning
  2016-02-09 20:54 ` [PATCHv9 4/6] git submodule update: have a dedicated helper for cloning Stefan Beller
  2016-02-09 21:24   ` Junio C Hamano
  2016-02-10  0:37   ` Jonathan Nieder
@ 2016-02-10  2:26   ` Jacob Keller
  2016-02-10 17:49     ` Stefan Beller
  2 siblings, 1 reply; 30+ messages in thread
From: Jacob Keller @ 2016-02-10  2:26 UTC (permalink / raw)
  To: Stefan Beller
  Cc: Junio C Hamano, Jonathan Nieder, Git mailing list, Jens Lehmann

On Tue, Feb 9, 2016 at 12:54 PM, Stefan Beller <sbeller@google.com> wrote:
> This introduces a new helper function in git submodule--helper
> which takes care of cloning all submodules, which we want to
> parallelize eventually.
>
> Some tests (such as empty URL, update_mode=none) are required in the
> helper to make the decision for cloning. These checks have been
> moved into the C function as well (no need to repeat them in the
> shell script).
>
> As we can only access the stderr channel from within the parallel
> processing engine, we need to reroute the error message for
> specified but initialized submodules to stderr. As it is an error
> message, this should have gone to stderr in the first place, so it
> is a bug fix along the way.
>

I was recently working on a way to make submodule pass some parameters
from the parent project into the submodule project (specifically
settings regarding authentication such as using an authentication
helper), and I think this would make it easier to do.

Thanks,
Jake

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCHv9 4/6] git submodule update: have a dedicated helper for cloning
  2016-02-10  2:26   ` Jacob Keller
@ 2016-02-10 17:49     ` Stefan Beller
  2016-02-11  7:46       ` Jacob Keller
  0 siblings, 1 reply; 30+ messages in thread
From: Stefan Beller @ 2016-02-10 17:49 UTC (permalink / raw)
  To: Jacob Keller
  Cc: Junio C Hamano, Jonathan Nieder, Git mailing list, Jens Lehmann

On Tue, Feb 9, 2016 at 6:26 PM, Jacob Keller <jacob.keller@gmail.com> wrote:
> On Tue, Feb 9, 2016 at 12:54 PM, Stefan Beller <sbeller@google.com> wrote:
>> This introduces a new helper function in git submodule--helper
>> which takes care of cloning all submodules, which we want to
>> parallelize eventually.
>>
>> Some tests (such as empty URL, update_mode=none) are required in the
>> helper to make the decision for cloning. These checks have been
>> moved into the C function as well (no need to repeat them in the
>> shell script).
>>
>> As we can only access the stderr channel from within the parallel
>> processing engine, we need to reroute the error message for
>> specified but initialized submodules to stderr. As it is an error
>> message, this should have gone to stderr in the first place, so it
>> is a bug fix along the way.
>>
>
> I was recently working on a way to make submodule pass some parameters
> from the parent project into the submodule project (specifically
> settings regarding authentication such as using an authentication
> helper), and I think this would make it easier to do.

Do you mean the separate bug fix patch as proposed by Jonathan or
this patch in general helps your idea of passing settings?

Thanks,
Stefan

>
> Thanks,
> Jake

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCHv9 4/6] git submodule update: have a dedicated helper for cloning
  2016-02-10 17:49     ` Stefan Beller
@ 2016-02-11  7:46       ` Jacob Keller
  0 siblings, 0 replies; 30+ messages in thread
From: Jacob Keller @ 2016-02-11  7:46 UTC (permalink / raw)
  To: Stefan Beller
  Cc: Junio C Hamano, Jonathan Nieder, Git mailing list, Jens Lehmann

On Wed, Feb 10, 2016 at 9:49 AM, Stefan Beller <sbeller@google.com> wrote:
> Do you mean the separate bug fix patch as proposed by Jonathan or
> this patch in general helps your idea of passing settings?
>
> Thanks,
> Stefan

This patch in general, I think.

I didn't mean to reply to the whole thread.

Thanks,
Jake

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCHv9 1/6] submodule-config: keep update strategy around
  2016-02-09 22:32   ` Junio C Hamano
@ 2016-02-11 20:00     ` Junio C Hamano
  0 siblings, 0 replies; 30+ messages in thread
From: Junio C Hamano @ 2016-02-11 20:00 UTC (permalink / raw)
  To: Stefan Beller; +Cc: jrnieder, git, Jens.Lehmann

Junio C Hamano <gitster@pobox.com> writes:

> Stefan Beller <sbeller@google.com> writes:
>
>> +		else {
>> +			submodule->update_command = NULL;
>> +			if (!strcmp(value, "none"))
>> +				submodule->update = SM_UPDATE_NONE;
>> +			else if (!strcmp(value, "checkout"))
>> +				submodule->update = SM_UPDATE_CHECKOUT;
>> +			else if (!strcmp(value, "rebase"))
>> +				submodule->update = SM_UPDATE_REBASE;
>> +			else if (!strcmp(value, "merge"))
>> +				submodule->update = SM_UPDATE_MERGE;
>> +			else if (!skip_prefix(value, "!", &submodule->update_command))
>> +				die(_("invalid value for %s"), var);
>> +		}
>
> I think this "string to enum" parser can become a separate helper
> function in this patch, so that later patch can use it to parse the
> "--update" option in the update_clone() function.
>
> That would solve the slight inconsistency we have seen
>
> +		if ((pp->update && !strcmp(pp->update, "none")) ||
> +		    (!pp->update && sub->update == SM_UPDATE_NONE)) {
>
> in that patch.

In addition, you would eventually want the reverse translation,
i.e. SM_UPDATE_ENUM to string, when you rebase the second patch of
your sb/submodule-init series (I just tried it myself and realized
that a patch to refactor the above part would be the best place to
add such a helper).

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCHv9 0/6] Expose submodule parallelism to the user
  2016-02-09 22:03     ` Junio C Hamano
@ 2016-02-11 20:23       ` Junio C Hamano
  2016-02-11 20:28         ` Junio C Hamano
  2016-02-11 20:33         ` Stefan Beller
  0 siblings, 2 replies; 30+ messages in thread
From: Junio C Hamano @ 2016-02-11 20:23 UTC (permalink / raw)
  To: Stefan Beller; +Cc: Jonathan Nieder, git, Jens Lehmann

Junio C Hamano <gitster@pobox.com> writes:

> Stefan Beller <sbeller@google.com> writes:
>
>>>> * This seems to clash with 00/20] refs backend.
>>>>> Applied this on top of a merge between the current 'master' and
>>>>> 'sb/submodule-parallel-update' topic to untangle the dependency;
>>>>> otherwise there is no way for this topic to make progress X-<.
>>>>
>>>> Anything I can do to help with easing the clash?
>>>
>>> Perhaps try to rebase the series on top of such a merge (with this
>>> updated series) yourself and propose it as a basis for the next
>>> reroll for David?  In short, working together with topic(s) that
>>> touch the same area?
>>
>> Ok, I'll see if I can find a better commit to base this series on.
>
> That is not what I meant.  I meant rebasing the refs-backend series
> on top of a merge between this one and 'master', just like the way I
> queued the refs-backend series on top of a merge between the
> previous round of this series and 'master'.
>
> These two topics want to update the same piece of code, so another
> possibility is to rebase this series on top of a merge between
> refs-backend and 'master', but the current iteration of refs-backend
> already depends on the previous round of this topic.  Rebasing this
> on top of refs-backend would involve first adjusting parts of
> refs-backend that touched the same code as the previous round of
> submodule-parallel-update touched so that refs-backend would work
> directly on top of 'master', and then including the necessary change
> to the refs-backend code while rebuilding submodule-parallel-update
> on top of the result.  So I do not think you would go in that
> direction.

Having said that, at least for this round, I do not think there is
nothing to do at this point on your end; I just created a merge
between master and your updated sb/submodule-parallel-update and
then rebased the LMDB series on top of it.  It at least applies
cleanly and I expect it would test OK as well (the test is still
running).

On your plate is to adjust the submodule-init topic so that it knows
that the .update field no longer is a string (but is now an enum).

I did try doing that myself to see the extent of necessary changes
but did not finish it myself, because I suspect that
sb/submodule-parallel-update may need further updates.

Thanks.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCHv9 0/6] Expose submodule parallelism to the user
  2016-02-11 20:23       ` Junio C Hamano
@ 2016-02-11 20:28         ` Junio C Hamano
  2016-02-11 20:33         ` Stefan Beller
  1 sibling, 0 replies; 30+ messages in thread
From: Junio C Hamano @ 2016-02-11 20:28 UTC (permalink / raw)
  To: Stefan Beller; +Cc: Jonathan Nieder, git, Jens Lehmann

Junio C Hamano <gitster@pobox.com> writes:

> Having said that, at least for this round, I do not think there is
> nothing to do at this point on your end;...

I obviously meant "I think there is nothing" here.  Sorry.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCHv9 0/6] Expose submodule parallelism to the user
  2016-02-11 20:23       ` Junio C Hamano
  2016-02-11 20:28         ` Junio C Hamano
@ 2016-02-11 20:33         ` Stefan Beller
  1 sibling, 0 replies; 30+ messages in thread
From: Stefan Beller @ 2016-02-11 20:33 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Jonathan Nieder, git, Jens Lehmann

On Thu, Feb 11, 2016 at 12:23 PM, Junio C Hamano <gitster@pobox.com> wrote:
> Junio C Hamano <gitster@pobox.com> writes:
>
>> Stefan Beller <sbeller@google.com> writes:
>>
>>>>> * This seems to clash with 00/20] refs backend.
>>>>>> Applied this on top of a merge between the current 'master' and
>>>>>> 'sb/submodule-parallel-update' topic to untangle the dependency;
>>>>>> otherwise there is no way for this topic to make progress X-<.
>>>>>
>>>>> Anything I can do to help with easing the clash?
>>>>
>>>> Perhaps try to rebase the series on top of such a merge (with this
>>>> updated series) yourself and propose it as a basis for the next
>>>> reroll for David?  In short, working together with topic(s) that
>>>> touch the same area?
>>>
>>> Ok, I'll see if I can find a better commit to base this series on.
>>
>> That is not what I meant.  I meant rebasing the refs-backend series
>> on top of a merge between this one and 'master', just like the way I
>> queued the refs-backend series on top of a merge between the
>> previous round of this series and 'master'.
>>
>> These two topics want to update the same piece of code, so another
>> possibility is to rebase this series on top of a merge between
>> refs-backend and 'master', but the current iteration of refs-backend
>> already depends on the previous round of this topic.  Rebasing this
>> on top of refs-backend would involve first adjusting parts of
>> refs-backend that touched the same code as the previous round of
>> submodule-parallel-update touched so that refs-backend would work
>> directly on top of 'master', and then including the necessary change
>> to the refs-backend code while rebuilding submodule-parallel-update
>> on top of the result.  So I do not think you would go in that
>> direction.
>
> Having said that, at least for this round, I do not think there is
> nothing to do at this point on your end; I just created a merge
> between master and your updated sb/submodule-parallel-update and
> then rebased the LMDB series on top of it.  It at least applies
> cleanly and I expect it would test OK as well (the test is still
> running).

I was about to send another round of this series with all the discussion
addressed and then take a look how to resolve any conflicts if any.

This sounds promising.

>
> On your plate is to adjust the submodule-init topic so that it knows
> that the .update field no longer is a string (but is now an enum).

After the reroll of this series.

>
> I did try doing that myself to see the extent of necessary changes
> but did not finish it myself, because I suspect that
> sb/submodule-parallel-update may need further updates.

I would hope to address that all within the next round, as the review
discussion seemed to have died down and I'll be fixing all the issues
pointed at.

Thanks,
Stefan

>
> Thanks.

^ permalink raw reply	[flat|nested] 30+ messages in thread

end of thread, other threads:[~2016-02-11 20:33 UTC | newest]

Thread overview: 30+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-02-09 20:54 [PATCHv9 0/6] Expose submodule parallelism to the user Stefan Beller
2016-02-09 20:54 ` [PATCHv9 1/6] submodule-config: keep update strategy around Stefan Beller
2016-02-09 21:08   ` Junio C Hamano
2016-02-09 22:19     ` Stefan Beller
2016-02-09 22:29       ` Junio C Hamano
2016-02-09 21:49   ` Jonathan Nieder
2016-02-09 22:32   ` Junio C Hamano
2016-02-11 20:00     ` Junio C Hamano
2016-02-09 20:54 ` [PATCHv9 2/6] submodule-config: drop check against NULL Stefan Beller
2016-02-09 21:50   ` Jonathan Nieder
2016-02-09 20:54 ` [PATCHv9 3/6] fetching submodules: respect `submodule.fetchJobs` config option Stefan Beller
2016-02-09 21:12   ` Junio C Hamano
2016-02-09 22:34   ` Jonathan Nieder
2016-02-10  0:11     ` Stefan Beller
2016-02-10  1:12       ` Junio C Hamano
2016-02-10  2:04         ` Junio C Hamano
2016-02-09 20:54 ` [PATCHv9 4/6] git submodule update: have a dedicated helper for cloning Stefan Beller
2016-02-09 21:24   ` Junio C Hamano
2016-02-10  0:37   ` Jonathan Nieder
2016-02-10  2:26   ` Jacob Keller
2016-02-10 17:49     ` Stefan Beller
2016-02-11  7:46       ` Jacob Keller
2016-02-09 20:54 ` [PATCHv9 5/6] submodule update: expose parallelism to the user Stefan Beller
2016-02-09 20:54 ` [PATCHv9 6/6] clone: allow an explicit argument for parallel submodule clones Stefan Beller
2016-02-09 21:39 ` [PATCHv9 0/6] Expose submodule parallelism to the user Junio C Hamano
2016-02-09 21:46   ` Stefan Beller
2016-02-09 22:03     ` Junio C Hamano
2016-02-11 20:23       ` Junio C Hamano
2016-02-11 20:28         ` Junio C Hamano
2016-02-11 20:33         ` Stefan Beller

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.