All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCHv8 0/9] Expose submodule parallelism to the user
@ 2016-02-04 22:09 Stefan Beller
  2016-02-04 22:09 ` [PATCHv8 1/9] submodule-config: keep update strategy around Stefan Beller
                   ` (8 more replies)
  0 siblings, 9 replies; 23+ messages in thread
From: Stefan Beller @ 2016-02-04 22:09 UTC (permalink / raw)
  To: git, gitster; +Cc: jrnieder, Jens.Lehmann, Stefan Beller

This replaces sb/submodule-parallel-update.
(which is based on sb/submodule-parallel-fetch,
but this series also applies cleanly on master)

* using a "more" correct version of parsing the section title
* fixed the memleaks, free-after-use in
  "git submodule update: have a dedicated helper for cloning"
* reworded documentation.
* another additional patch snuck in, which makes code a bit more readable
  (0004-submodule-config-slightly-simplify-lookup_or_create_.patch)

Thanks,
Stefan

Interdiff to v7:

diff --git a/Documentation/config.txt b/Documentation/config.txt
index eda3276..3b02732 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -2647,11 +2647,10 @@ submodule.<name>.ignore::
 	affected by this setting.
 
 submodule.fetchJobs::
-	This is used to determine how many submodules will be
-	fetched/cloned at the same time. Specifying a positive integer
-	allows up to that number of submodules being fetched in parallel.
-	This is used in fetch and clone operations only. A value of 0 will
-	give some reasonable configuration. It defaults to 1.
+	Specifies how many submodules are fetched/cloned at the same time.
+	A positive integer allows up to that number of submodules fetched
+	in parallel. A value of 0 will give some reasonable default.
+	If unset, it defaults to 1.
 
 tag.sort::
 	This variable controls the sort ordering of tags when displayed by
diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
index 8002187..7e01b85 100644
--- a/builtin/submodule--helper.c
+++ b/builtin/submodule--helper.c
@@ -312,33 +312,31 @@ static int update_clone_get_next_task(struct child_process *cp,
 
 	for (; pp->count < pp->list.nr; pp->count++) {
 		const struct submodule *sub = NULL;
-		const char *displaypath = NULL;
 		const struct cache_entry *ce = pp->list.entries[pp->count];
+		struct strbuf displaypath_sb = STRBUF_INIT;
 		struct strbuf sb = STRBUF_INIT;
+		const char *displaypath = NULL;
 		const char *update_module = NULL;
 		char *url = NULL;
 		int needs_cloning = 0;
 
 		if (ce_stage(ce)) {
 			if (pp->recursive_prefix)
-				strbuf_addf(err, "Skipping unmerged submodule %s/%s\n",
+				strbuf_addf(err,
+					"Skipping unmerged submodule %s/%s\n",
 					pp->recursive_prefix, ce->name);
 			else
-				strbuf_addf(err, "Skipping unmerged submodule %s\n",
+				strbuf_addf(err,
+					"Skipping unmerged submodule %s\n",
 					ce->name);
-			continue;
+			goto cleanup_and_continue;
 		}
 
 		sub = submodule_from_path(null_sha1, ce->name);
-		if (!sub) {
-			strbuf_addf(err, "BUG: internal error managing submodules. "
-				    "The cache could not locate '%s'", ce->name);
-			pp->print_unmatched = 1;
-			continue;
-		}
 
 		if (pp->recursive_prefix)
-			displaypath = relative_path(pp->recursive_prefix, ce->name, &sb);
+			displaypath = relative_path(pp->recursive_prefix,
+						    ce->name, &displaypath_sb);
 		else
 			displaypath = ce->name;
 
@@ -349,14 +347,15 @@ static int update_clone_get_next_task(struct child_process *cp,
 		if (!update_module)
 			update_module = "checkout";
 		if (!strcmp(update_module, "none")) {
-			strbuf_addf(err, "Skipping submodule '%s'\n", displaypath);
-			continue;
+			strbuf_addf(err, "Skipping submodule '%s'\n",
+				    displaypath);
+			goto cleanup_and_continue;
 		}
 
 		/*
 		 * Looking up the url in .git/config.
-		 * We must not fall back to .gitmodules as we only want to process
-		 * configured submodules.
+		 * We must not fall back to .gitmodules as we only want
+		 * to process configured submodules.
 		 */
 		strbuf_reset(&sb);
 		strbuf_addf(&sb, "submodule.%s.url", sub->name);
@@ -367,9 +366,11 @@ static int update_clone_get_next_task(struct child_process *cp,
 			 * path have been specified
 			 */
 			if (pp->pathspec.nr)
-				strbuf_addf(err, _("Submodule path '%s' not initialized\n"
-					"Maybe you want to use 'update --init'?"), displaypath);
-			continue;
+				strbuf_addf(err,
+					    _("Submodule path '%s' not initialized\n"
+					    "Maybe you want to use 'update --init'?"),
+					    displaypath);
+			goto cleanup_and_continue;
 		}
 
 		strbuf_reset(&sb);
@@ -383,13 +384,19 @@ static int update_clone_get_next_task(struct child_process *cp,
 		string_list_append(&pp->projectlines, sb.buf);
 
 		if (needs_cloning) {
-			fill_clone_command(cp, pp->quiet, pp->prefix, ce->name,
-					   sub->name, url, pp->reference, pp->depth);
+			fill_clone_command(cp, pp->quiet, pp->prefix,
+					   ce->name, sub->name, strdup(url),
+					   pp->reference, pp->depth);
 			pp->count++;
-			free(url);
+		}
+
+cleanup_and_continue:
+		free(url);
+		strbuf_reset(&displaypath_sb);
+		strbuf_reset(&sb);
+
+		if (needs_cloning)
 			return 1;
-		} else
-			free(url);
 	}
 	return 0;
 }
diff --git a/submodule-config.c b/submodule-config.c
index a32259e..339b59d 100644
--- a/submodule-config.c
+++ b/submodule-config.c
@@ -32,7 +32,7 @@ enum lookup_type {
 
 static struct submodule_cache cache;
 static int is_cache_init;
-static int parallel_jobs = -1;
+static unsigned long parallel_jobs = -1;
 
 static int config_path_cmp(const struct submodule_entry *a,
 			   const struct submodule_entry *b,
@@ -163,22 +163,17 @@ static struct submodule *cache_lookup_name(struct submodule_cache *cache,
 }
 
 static struct submodule *lookup_or_create_by_name(struct submodule_cache *cache,
-						  const unsigned char *gitmodules_sha1,
-						  const char *name_ptr, int name_len)
+		const unsigned char *gitmodules_sha1, const char *name)
 {
 	struct submodule *submodule;
-	struct strbuf name_buf = STRBUF_INIT;
-	char *name = xmemdupz(name_ptr, name_len);
 
 	submodule = cache_lookup_name(cache, gitmodules_sha1, name);
 	if (submodule)
-		goto out;
+		return submodule;
 
 	submodule = xmalloc(sizeof(*submodule));
 
-	strbuf_addstr(&name_buf, name);
-	submodule->name = strbuf_detach(&name_buf, NULL);
-
+	submodule->name = xstrdup(name);
 	submodule->path = NULL;
 	submodule->url = NULL;
 	submodule->update = NULL;
@@ -188,8 +183,7 @@ static struct submodule *lookup_or_create_by_name(struct submodule_cache *cache,
 	hashcpy(submodule->gitmodules_sha1, gitmodules_sha1);
 
 	cache_add(cache, submodule);
-out:
-	free(name);
+
 	return submodule;
 }
 
@@ -241,18 +235,16 @@ static int parse_generic_submodule_config(const char *key,
 					  struct parse_config_parameter *me)
 {
 	if (!strcmp(key, "fetchjobs")) {
-		parallel_jobs = strtol(value, NULL, 10);
-		if (parallel_jobs < 0) {
-			warning("submodule.fetchJobs not allowed to be negative.");
+		if (!git_parse_ulong(value, &parallel_jobs)) {
+			warning(_("Error parsing submodule.fetchJobs; Defaulting to 1."));
 			parallel_jobs = 1;
-			return 1;
 		}
 	}
 
 	return 0;
 }
 
-static int parse_specific_submodule_config(const char *subsection, int subsection_len,
+static int parse_specific_submodule_config(const char *subsection,
 					   const char *key,
 					   const char *var,
 					   const char *value,
@@ -262,7 +254,7 @@ static int parse_specific_submodule_config(const char *subsection, int subsectio
 	struct submodule *submodule;
 	submodule = lookup_or_create_by_name(me->cache,
 					     me->gitmodules_sha1,
-					     subsection, subsection_len);
+					     subsection);
 
 	if (!strcmp(key, "path")) {
 		if (!value)
@@ -332,19 +324,22 @@ static int parse_specific_submodule_config(const char *subsection, int subsectio
 static int parse_config(const char *var, const char *value, void *data)
 {
 	struct parse_config_parameter *me = data;
-	int subsection_len;
+	int subsection_len, ret;
 	const char *subsection, *key;
+	char *sub;
 
 	if (parse_config_key(var, "submodule", &subsection,
 			     &subsection_len, &key) < 0)
 		return 0;
 
-	if (!subsection_len)
+	if (!subsection)
 		return parse_generic_submodule_config(key, var, value, me);
-	else
-		return parse_specific_submodule_config(subsection,
-						       subsection_len, key,
-						       var, value, me);
+
+	sub = xmemdupz(subsection, subsection_len);
+	ret = parse_specific_submodule_config(sub, key,
+					      var, value, me);
+	free(sub);
+	return ret;
 }
 
 static int gitmodule_sha1_from_commit(const unsigned char *commit_sha1,
@@ -493,7 +488,7 @@ void submodule_free(void)
 	is_cache_init = 0;
 }
 
-int config_parallel_submodules(void)
+unsigned long config_parallel_submodules(void)
 {
 	return parallel_jobs;
 }
diff --git a/submodule-config.h b/submodule-config.h
index d9bbf9a..bab1e8d 100644
--- a/submodule-config.h
+++ b/submodule-config.h
@@ -27,6 +27,6 @@ const struct submodule *submodule_from_path(const unsigned char *commit_sha1,
 		const char *path);
 void submodule_free(void);
 
-int config_parallel_submodules(void);
+unsigned long config_parallel_submodules(void);
 
 #endif /* SUBMODULE_CONFIG_H */


Stefan Beller (9):
  submodule-config: keep update strategy around
  submodule-config: drop check against NULL
  submodule-config: remove name_and_item_from_var
  submodule-config: slightly simplify lookup_or_create_by_name
  submodule-config: introduce parse_generic_submodule_config
  fetching submodules: respect `submodule.fetchJobs` config option
  git submodule update: have a dedicated helper for cloning
  submodule update: expose parallelism to the user
  clone: allow an explicit argument for parallel submodule clones

 Documentation/config.txt        |   6 +
 Documentation/git-clone.txt     |   6 +-
 Documentation/git-submodule.txt |   7 +-
 builtin/clone.c                 |  19 +++-
 builtin/fetch.c                 |   2 +-
 builtin/submodule--helper.c     | 246 ++++++++++++++++++++++++++++++++++++++++
 git-submodule.sh                |  54 ++++-----
 submodule-config.c              | 104 ++++++++++-------
 submodule-config.h              |   3 +
 submodule.c                     |   5 +
 t/t5526-fetch-submodules.sh     |  14 +++
 t/t7400-submodule-basic.sh      |   4 +-
 t/t7406-submodule-update.sh     |  27 +++++
 13 files changed, 414 insertions(+), 83 deletions(-)

-- 
2.7.0.rc0.41.gd102984.dirty

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCHv8 1/9] submodule-config: keep update strategy around
  2016-02-04 22:09 [PATCHv8 0/9] Expose submodule parallelism to the user Stefan Beller
@ 2016-02-04 22:09 ` Stefan Beller
  2016-02-05  0:59   ` Jonathan Nieder
  2016-02-04 22:09 ` [PATCHv8 2/9] submodule-config: drop check against NULL Stefan Beller
                   ` (7 subsequent siblings)
  8 siblings, 1 reply; 23+ messages in thread
From: Stefan Beller @ 2016-02-04 22:09 UTC (permalink / raw)
  To: git, gitster; +Cc: jrnieder, Jens.Lehmann, Stefan Beller

We need the submodule update strategies in a later patch.

Signed-off-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 submodule-config.c | 11 +++++++++++
 submodule-config.h |  1 +
 2 files changed, 12 insertions(+)

diff --git a/submodule-config.c b/submodule-config.c
index afe0ea8..4239b0e 100644
--- a/submodule-config.c
+++ b/submodule-config.c
@@ -194,6 +194,7 @@ static struct submodule *lookup_or_create_by_name(struct submodule_cache *cache,
 
 	submodule->path = NULL;
 	submodule->url = NULL;
+	submodule->update = NULL;
 	submodule->fetch_recurse = RECURSE_SUBMODULES_NONE;
 	submodule->ignore = NULL;
 
@@ -311,6 +312,16 @@ static int parse_config(const char *var, const char *value, void *data)
 			free((void *) submodule->url);
 			submodule->url = xstrdup(value);
 		}
+	} else if (!strcmp(item.buf, "update")) {
+		if (!value)
+			ret = config_error_nonbool(var);
+		else if (!me->overwrite && submodule->update != NULL)
+			warn_multiple_config(me->commit_sha1, submodule->name,
+					     "update");
+		else {
+			free((void *) submodule->update);
+			submodule->update = xstrdup(value);
+		}
 	}
 
 	strbuf_release(&name);
diff --git a/submodule-config.h b/submodule-config.h
index 9061e4e..f9e2a29 100644
--- a/submodule-config.h
+++ b/submodule-config.h
@@ -14,6 +14,7 @@ struct submodule {
 	const char *url;
 	int fetch_recurse;
 	const char *ignore;
+	const char *update;
 	/* the sha1 blob id of the responsible .gitmodules file */
 	unsigned char gitmodules_sha1[20];
 };
-- 
2.7.0.rc0.41.gd102984.dirty

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCHv8 2/9] submodule-config: drop check against NULL
  2016-02-04 22:09 [PATCHv8 0/9] Expose submodule parallelism to the user Stefan Beller
  2016-02-04 22:09 ` [PATCHv8 1/9] submodule-config: keep update strategy around Stefan Beller
@ 2016-02-04 22:09 ` Stefan Beller
  2016-02-05  1:05   ` Jonathan Nieder
  2016-02-04 22:09 ` [PATCHv8 3/9] submodule-config: remove name_and_item_from_var Stefan Beller
                   ` (6 subsequent siblings)
  8 siblings, 1 reply; 23+ messages in thread
From: Stefan Beller @ 2016-02-04 22:09 UTC (permalink / raw)
  To: git, gitster; +Cc: jrnieder, Jens.Lehmann, Stefan Beller

Adhere to the common coding style of Git and not check explicitly
for NULL throughout the file. There are still other occurrences in the
code base but that is usually inside of conditions with side effects.

Signed-off-by: Stefan Beller <sbeller@google.com>
---
 submodule-config.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/submodule-config.c b/submodule-config.c
index 4239b0e..6d01941 100644
--- a/submodule-config.c
+++ b/submodule-config.c
@@ -265,7 +265,7 @@ static int parse_config(const char *var, const char *value, void *data)
 	if (!strcmp(item.buf, "path")) {
 		if (!value)
 			ret = config_error_nonbool(var);
-		else if (!me->overwrite && submodule->path != NULL)
+		else if (!me->overwrite && submodule->path)
 			warn_multiple_config(me->commit_sha1, submodule->name,
 					"path");
 		else {
@@ -289,7 +289,7 @@ static int parse_config(const char *var, const char *value, void *data)
 	} else if (!strcmp(item.buf, "ignore")) {
 		if (!value)
 			ret = config_error_nonbool(var);
-		else if (!me->overwrite && submodule->ignore != NULL)
+		else if (!me->overwrite && submodule->ignore)
 			warn_multiple_config(me->commit_sha1, submodule->name,
 					"ignore");
 		else if (strcmp(value, "untracked") &&
@@ -305,7 +305,7 @@ static int parse_config(const char *var, const char *value, void *data)
 	} else if (!strcmp(item.buf, "url")) {
 		if (!value) {
 			ret = config_error_nonbool(var);
-		} else if (!me->overwrite && submodule->url != NULL) {
+		} else if (!me->overwrite && submodule->url) {
 			warn_multiple_config(me->commit_sha1, submodule->name,
 					"url");
 		} else {
@@ -315,7 +315,7 @@ static int parse_config(const char *var, const char *value, void *data)
 	} else if (!strcmp(item.buf, "update")) {
 		if (!value)
 			ret = config_error_nonbool(var);
-		else if (!me->overwrite && submodule->update != NULL)
+		else if (!me->overwrite && submodule->update)
 			warn_multiple_config(me->commit_sha1, submodule->name,
 					     "update");
 		else {
-- 
2.7.0.rc0.41.gd102984.dirty

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCHv8 3/9] submodule-config: remove name_and_item_from_var
  2016-02-04 22:09 [PATCHv8 0/9] Expose submodule parallelism to the user Stefan Beller
  2016-02-04 22:09 ` [PATCHv8 1/9] submodule-config: keep update strategy around Stefan Beller
  2016-02-04 22:09 ` [PATCHv8 2/9] submodule-config: drop check against NULL Stefan Beller
@ 2016-02-04 22:09 ` Stefan Beller
  2016-02-06  0:46   ` Jonathan Nieder
  2016-02-04 22:09 ` [PATCHv8 4/9] submodule-config: slightly simplify lookup_or_create_by_name Stefan Beller
                   ` (5 subsequent siblings)
  8 siblings, 1 reply; 23+ messages in thread
From: Stefan Beller @ 2016-02-04 22:09 UTC (permalink / raw)
  To: git, gitster; +Cc: jrnieder, Jens.Lehmann, Stefan Beller

`name_and_item_from_var` does not provide the proper abstraction
we need here in a later patch. As we have only a single call site
for `name_and_item_from_var`, just inline it. Instead of using
strbufs, use direct char *.

Signed-off-by: Stefan Beller <sbeller@google.com>
---
 submodule-config.c | 40 +++++++++++-----------------------------
 1 file changed, 11 insertions(+), 29 deletions(-)

diff --git a/submodule-config.c b/submodule-config.c
index 6d01941..c08ee7f 100644
--- a/submodule-config.c
+++ b/submodule-config.c
@@ -161,22 +161,6 @@ static struct submodule *cache_lookup_name(struct submodule_cache *cache,
 	return NULL;
 }
 
-static int name_and_item_from_var(const char *var, struct strbuf *name,
-				  struct strbuf *item)
-{
-	const char *subsection, *key;
-	int subsection_len, parse;
-	parse = parse_config_key(var, "submodule", &subsection,
-			&subsection_len, &key);
-	if (parse < 0 || !subsection)
-		return 0;
-
-	strbuf_add(name, subsection, subsection_len);
-	strbuf_addstr(item, key);
-
-	return 1;
-}
-
 static struct submodule *lookup_or_create_by_name(struct submodule_cache *cache,
 		const unsigned char *gitmodules_sha1, const char *name)
 {
@@ -251,18 +235,19 @@ static int parse_config(const char *var, const char *value, void *data)
 {
 	struct parse_config_parameter *me = data;
 	struct submodule *submodule;
-	struct strbuf name = STRBUF_INIT, item = STRBUF_INIT;
-	int ret = 0;
+	int subsection_len, ret = 0;
+	const char *subsection, *key;
 
-	/* this also ensures that we only parse submodule entries */
-	if (!name_and_item_from_var(var, &name, &item))
+	if (parse_config_key(var, "submodule", &subsection,
+			     &subsection_len, &key) < 0 || !subsection)
 		return 0;
 
+	subsection = xmemdupz(subsection, subsection_len);
 	submodule = lookup_or_create_by_name(me->cache,
 					     me->gitmodules_sha1,
-					     name.buf);
+					     subsection);
 
-	if (!strcmp(item.buf, "path")) {
+	if (!strcmp(key, "path")) {
 		if (!value)
 			ret = config_error_nonbool(var);
 		else if (!me->overwrite && submodule->path)
@@ -275,7 +260,7 @@ static int parse_config(const char *var, const char *value, void *data)
 			submodule->path = xstrdup(value);
 			cache_put_path(me->cache, submodule);
 		}
-	} else if (!strcmp(item.buf, "fetchrecursesubmodules")) {
+	} else if (!strcmp(key, "fetchrecursesubmodules")) {
 		/* when parsing worktree configurations we can die early */
 		int die_on_error = is_null_sha1(me->gitmodules_sha1);
 		if (!me->overwrite &&
@@ -286,7 +271,7 @@ static int parse_config(const char *var, const char *value, void *data)
 			submodule->fetch_recurse = parse_fetch_recurse(
 								var, value,
 								die_on_error);
-	} else if (!strcmp(item.buf, "ignore")) {
+	} else if (!strcmp(key, "ignore")) {
 		if (!value)
 			ret = config_error_nonbool(var);
 		else if (!me->overwrite && submodule->ignore)
@@ -302,7 +287,7 @@ static int parse_config(const char *var, const char *value, void *data)
 			free((void *) submodule->ignore);
 			submodule->ignore = xstrdup(value);
 		}
-	} else if (!strcmp(item.buf, "url")) {
+	} else if (!strcmp(key, "url")) {
 		if (!value) {
 			ret = config_error_nonbool(var);
 		} else if (!me->overwrite && submodule->url) {
@@ -312,7 +297,7 @@ static int parse_config(const char *var, const char *value, void *data)
 			free((void *) submodule->url);
 			submodule->url = xstrdup(value);
 		}
-	} else if (!strcmp(item.buf, "update")) {
+	} else if (!strcmp(key, "update")) {
 		if (!value)
 			ret = config_error_nonbool(var);
 		else if (!me->overwrite && submodule->update)
@@ -324,9 +309,6 @@ static int parse_config(const char *var, const char *value, void *data)
 		}
 	}
 
-	strbuf_release(&name);
-	strbuf_release(&item);
-
 	return ret;
 }
 
-- 
2.7.0.rc0.41.gd102984.dirty

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCHv8 4/9] submodule-config: slightly simplify lookup_or_create_by_name
  2016-02-04 22:09 [PATCHv8 0/9] Expose submodule parallelism to the user Stefan Beller
                   ` (2 preceding siblings ...)
  2016-02-04 22:09 ` [PATCHv8 3/9] submodule-config: remove name_and_item_from_var Stefan Beller
@ 2016-02-04 22:09 ` Stefan Beller
  2016-02-06  0:48   ` Jonathan Nieder
  2016-02-04 22:09 ` [PATCHv8 5/9] submodule-config: introduce parse_generic_submodule_config Stefan Beller
                   ` (4 subsequent siblings)
  8 siblings, 1 reply; 23+ messages in thread
From: Stefan Beller @ 2016-02-04 22:09 UTC (permalink / raw)
  To: git, gitster; +Cc: jrnieder, Jens.Lehmann, Stefan Beller

No need for a strbuf, when all we use it for, is duplicating a string.

Signed-off-by: Stefan Beller <sbeller@google.com>
---
 submodule-config.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/submodule-config.c b/submodule-config.c
index c08ee7f..e375730 100644
--- a/submodule-config.c
+++ b/submodule-config.c
@@ -165,7 +165,6 @@ static struct submodule *lookup_or_create_by_name(struct submodule_cache *cache,
 		const unsigned char *gitmodules_sha1, const char *name)
 {
 	struct submodule *submodule;
-	struct strbuf name_buf = STRBUF_INIT;
 
 	submodule = cache_lookup_name(cache, gitmodules_sha1, name);
 	if (submodule)
@@ -173,9 +172,7 @@ static struct submodule *lookup_or_create_by_name(struct submodule_cache *cache,
 
 	submodule = xmalloc(sizeof(*submodule));
 
-	strbuf_addstr(&name_buf, name);
-	submodule->name = strbuf_detach(&name_buf, NULL);
-
+	submodule->name = xstrdup(name);
 	submodule->path = NULL;
 	submodule->url = NULL;
 	submodule->update = NULL;
-- 
2.7.0.rc0.41.gd102984.dirty

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCHv8 5/9] submodule-config: introduce parse_generic_submodule_config
  2016-02-04 22:09 [PATCHv8 0/9] Expose submodule parallelism to the user Stefan Beller
                   ` (3 preceding siblings ...)
  2016-02-04 22:09 ` [PATCHv8 4/9] submodule-config: slightly simplify lookup_or_create_by_name Stefan Beller
@ 2016-02-04 22:09 ` Stefan Beller
  2016-02-06  1:23   ` Jonathan Nieder
  2016-02-04 22:09 ` [PATCHv8 6/9] fetching submodules: respect `submodule.fetchJobs` config option Stefan Beller
                   ` (3 subsequent siblings)
  8 siblings, 1 reply; 23+ messages in thread
From: Stefan Beller @ 2016-02-04 22:09 UTC (permalink / raw)
  To: git, gitster; +Cc: jrnieder, Jens.Lehmann, Stefan Beller

This rewrites parse_config to distinguish between configs specific to
one submodule and configs which apply generically to all submodules.
We do not have generic submodule configs yet, but the next patch will
introduce "submodule.fetchJobs".

Signed-off-by: Stefan Beller <sbeller@google.com>
---
 submodule-config.c | 45 +++++++++++++++++++++++++++++++++++----------
 1 file changed, 35 insertions(+), 10 deletions(-)

diff --git a/submodule-config.c b/submodule-config.c
index e375730..2841c5e 100644
--- a/submodule-config.c
+++ b/submodule-config.c
@@ -228,18 +228,22 @@ struct parse_config_parameter {
 	int overwrite;
 };
 
-static int parse_config(const char *var, const char *value, void *data)
+static int parse_generic_submodule_config(const char *key,
+					  const char *var,
+					  const char *value,
+					  struct parse_config_parameter *me)
 {
-	struct parse_config_parameter *me = data;
-	struct submodule *submodule;
-	int subsection_len, ret = 0;
-	const char *subsection, *key;
-
-	if (parse_config_key(var, "submodule", &subsection,
-			     &subsection_len, &key) < 0 || !subsection)
-		return 0;
+	return 0;
+}
 
-	subsection = xmemdupz(subsection, subsection_len);
+static int parse_specific_submodule_config(const char *subsection,
+					   const char *key,
+					   const char *var,
+					   const char *value,
+					   struct parse_config_parameter *me)
+{
+	int ret = 0;
+	struct submodule *submodule;
 	submodule = lookup_or_create_by_name(me->cache,
 					     me->gitmodules_sha1,
 					     subsection);
@@ -309,6 +313,27 @@ static int parse_config(const char *var, const char *value, void *data)
 	return ret;
 }
 
+static int parse_config(const char *var, const char *value, void *data)
+{
+	struct parse_config_parameter *me = data;
+	int subsection_len, ret;
+	const char *subsection, *key;
+	char *sub;
+
+	if (parse_config_key(var, "submodule", &subsection,
+			     &subsection_len, &key) < 0)
+		return 0;
+
+	if (!subsection)
+		return parse_generic_submodule_config(key, var, value, me);
+
+	sub = xmemdupz(subsection, subsection_len);
+	ret = parse_specific_submodule_config(sub, key,
+					      var, value, me);
+	free(sub);
+	return ret;
+}
+
 static int gitmodule_sha1_from_commit(const unsigned char *commit_sha1,
 				      unsigned char *gitmodules_sha1)
 {
-- 
2.7.0.rc0.41.gd102984.dirty

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCHv8 6/9] fetching submodules: respect `submodule.fetchJobs` config option
  2016-02-04 22:09 [PATCHv8 0/9] Expose submodule parallelism to the user Stefan Beller
                   ` (4 preceding siblings ...)
  2016-02-04 22:09 ` [PATCHv8 5/9] submodule-config: introduce parse_generic_submodule_config Stefan Beller
@ 2016-02-04 22:09 ` Stefan Beller
  2016-02-05  3:29   ` Junio C Hamano
  2016-02-04 22:09 ` [PATCHv8 7/9] git submodule update: have a dedicated helper for cloning Stefan Beller
                   ` (2 subsequent siblings)
  8 siblings, 1 reply; 23+ messages in thread
From: Stefan Beller @ 2016-02-04 22:09 UTC (permalink / raw)
  To: git, gitster; +Cc: jrnieder, Jens.Lehmann, Stefan Beller

This allows to configure fetching and updating in parallel
without having the command line option.

This moved the responsibility to determine how many parallel processes
to start from builtin/fetch to submodule.c as we need a way to communicate
"The user did not specify the number of parallel processes in the command
line options" in the builtin fetch. The submodule code takes care of
the precedence (CLI > config > default)

Signed-off-by: Stefan Beller <sbeller@google.com>
---
 Documentation/config.txt    |  6 ++++++
 builtin/fetch.c             |  2 +-
 submodule-config.c          | 13 +++++++++++++
 submodule-config.h          |  2 ++
 submodule.c                 |  5 +++++
 t/t5526-fetch-submodules.sh | 14 ++++++++++++++
 6 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/Documentation/config.txt b/Documentation/config.txt
index 2d06b11..3b02732 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -2646,6 +2646,12 @@ submodule.<name>.ignore::
 	"--ignore-submodules" option. The 'git submodule' commands are not
 	affected by this setting.
 
+submodule.fetchJobs::
+	Specifies how many submodules are fetched/cloned at the same time.
+	A positive integer allows up to that number of submodules fetched
+	in parallel. A value of 0 will give some reasonable default.
+	If unset, it defaults to 1.
+
 tag.sort::
 	This variable controls the sort ordering of tags when displayed by
 	linkgit:git-tag[1]. Without the "--sort=<value>" option provided, the
diff --git a/builtin/fetch.c b/builtin/fetch.c
index 586840d..5aa1c2d 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -37,7 +37,7 @@ static int prune = -1; /* unspecified */
 static int all, append, dry_run, force, keep, multiple, update_head_ok, verbosity;
 static int progress = -1, recurse_submodules = RECURSE_SUBMODULES_DEFAULT;
 static int tags = TAGS_DEFAULT, unshallow, update_shallow;
-static int max_children = 1;
+static int max_children = -1;
 static const char *depth;
 static const char *upload_pack;
 static struct strbuf default_rla = STRBUF_INIT;
diff --git a/submodule-config.c b/submodule-config.c
index 2841c5e..339b59d 100644
--- a/submodule-config.c
+++ b/submodule-config.c
@@ -32,6 +32,7 @@ enum lookup_type {
 
 static struct submodule_cache cache;
 static int is_cache_init;
+static unsigned long parallel_jobs = -1;
 
 static int config_path_cmp(const struct submodule_entry *a,
 			   const struct submodule_entry *b,
@@ -233,6 +234,13 @@ static int parse_generic_submodule_config(const char *key,
 					  const char *value,
 					  struct parse_config_parameter *me)
 {
+	if (!strcmp(key, "fetchjobs")) {
+		if (!git_parse_ulong(value, &parallel_jobs)) {
+			warning(_("Error parsing submodule.fetchJobs; Defaulting to 1."));
+			parallel_jobs = 1;
+		}
+	}
+
 	return 0;
 }
 
@@ -479,3 +487,8 @@ void submodule_free(void)
 	cache_free(&cache);
 	is_cache_init = 0;
 }
+
+unsigned long config_parallel_submodules(void)
+{
+	return parallel_jobs;
+}
diff --git a/submodule-config.h b/submodule-config.h
index f9e2a29..bab1e8d 100644
--- a/submodule-config.h
+++ b/submodule-config.h
@@ -27,4 +27,6 @@ const struct submodule *submodule_from_path(const unsigned char *commit_sha1,
 		const char *path);
 void submodule_free(void);
 
+unsigned long config_parallel_submodules(void);
+
 #endif /* SUBMODULE_CONFIG_H */
diff --git a/submodule.c b/submodule.c
index b83939c..eb7d54b 100644
--- a/submodule.c
+++ b/submodule.c
@@ -751,6 +751,11 @@ int fetch_populated_submodules(const struct argv_array *options,
 	argv_array_push(&spf.args, "--recurse-submodules-default");
 	/* default value, "--submodule-prefix" and its value are added later */
 
+	if (max_parallel_jobs < 0)
+		max_parallel_jobs = config_parallel_submodules();
+	if (max_parallel_jobs < 0)
+		max_parallel_jobs = 1;
+
 	calculate_changed_submodule_paths();
 	run_processes_parallel(max_parallel_jobs,
 			       get_next_submodule,
diff --git a/t/t5526-fetch-submodules.sh b/t/t5526-fetch-submodules.sh
index 1241146..954d0e4 100755
--- a/t/t5526-fetch-submodules.sh
+++ b/t/t5526-fetch-submodules.sh
@@ -471,4 +471,18 @@ test_expect_success "don't fetch submodule when newly recorded commits are alrea
 	test_i18ncmp expect.err actual.err
 '
 
+test_expect_success 'fetching submodules respects parallel settings' '
+	git config fetch.recurseSubmodules true &&
+	(
+		cd downstream &&
+		GIT_TRACE=$(pwd)/trace.out git fetch --jobs 7 &&
+		grep "7 tasks" trace.out &&
+		git config submodule.fetchJobs 8 &&
+		GIT_TRACE=$(pwd)/trace.out git fetch &&
+		grep "8 tasks" trace.out &&
+		GIT_TRACE=$(pwd)/trace.out git fetch --jobs 9 &&
+		grep "9 tasks" trace.out
+	)
+'
+
 test_done
-- 
2.7.0.rc0.41.gd102984.dirty

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCHv8 7/9] git submodule update: have a dedicated helper for cloning
  2016-02-04 22:09 [PATCHv8 0/9] Expose submodule parallelism to the user Stefan Beller
                   ` (5 preceding siblings ...)
  2016-02-04 22:09 ` [PATCHv8 6/9] fetching submodules: respect `submodule.fetchJobs` config option Stefan Beller
@ 2016-02-04 22:09 ` Stefan Beller
  2016-02-04 22:09 ` [PATCHv8 8/9] submodule update: expose parallelism to the user Stefan Beller
  2016-02-04 22:09 ` [PATCHv8 9/9] clone: allow an explicit argument for parallel submodule clones Stefan Beller
  8 siblings, 0 replies; 23+ messages in thread
From: Stefan Beller @ 2016-02-04 22:09 UTC (permalink / raw)
  To: git, gitster; +Cc: jrnieder, Jens.Lehmann, Stefan Beller

This introduces a new helper function in git submodule--helper
which takes care of cloning all submodules, which we want to
parallelize eventually.

Some tests (such as empty URL, update_mode=none) are required in the
helper to make the decision for cloning. These checks have been
moved into the C function as well (no need to repeat them in the
shell script).

As we can only access the stderr channel from within the parallel
processing engine, we need to reroute the error message for
specified but initialized submodules to stderr. As it is an error
message, this should have gone to stderr in the first place, so it
is a bug fix along the way.

Signed-off-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/submodule--helper.c | 236 ++++++++++++++++++++++++++++++++++++++++++++
 git-submodule.sh            |  45 +++------
 t/t7400-submodule-basic.sh  |   4 +-
 3 files changed, 249 insertions(+), 36 deletions(-)

diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
index f4c3eff..90f9dc6 100644
--- a/builtin/submodule--helper.c
+++ b/builtin/submodule--helper.c
@@ -255,6 +255,241 @@ static int module_clone(int argc, const char **argv, const char *prefix)
 	return 0;
 }
 
+static int git_submodule_config(const char *var, const char *value, void *cb)
+{
+	return parse_submodule_config_option(var, value);
+}
+
+struct submodule_update_clone {
+	/* states */
+	int count;
+	int print_unmatched;
+	/* configuration */
+	int quiet;
+	const char *reference;
+	const char *depth;
+	const char *update;
+	const char *recursive_prefix;
+	const char *prefix;
+	struct module_list list;
+	struct string_list projectlines;
+	struct pathspec pathspec;
+};
+#define SUBMODULE_UPDATE_CLONE_INIT {0, 0, 0, NULL, NULL, NULL, NULL, NULL, MODULE_LIST_INIT, STRING_LIST_INIT_DUP}
+
+static void fill_clone_command(struct child_process *cp, int quiet,
+			       const char *prefix, const char *path,
+			       const char *name, const char *url,
+			       const char *reference, const char *depth)
+{
+	cp->git_cmd = 1;
+	cp->no_stdin = 1;
+	cp->stdout_to_stderr = 1;
+	cp->err = -1;
+	argv_array_push(&cp->args, "submodule--helper");
+	argv_array_push(&cp->args, "clone");
+	if (quiet)
+		argv_array_push(&cp->args, "--quiet");
+
+	if (prefix)
+		argv_array_pushl(&cp->args, "--prefix", prefix, NULL);
+
+	argv_array_pushl(&cp->args, "--path", path, NULL);
+	argv_array_pushl(&cp->args, "--name", name, NULL);
+	argv_array_pushl(&cp->args, "--url", url, NULL);
+	if (reference)
+		argv_array_push(&cp->args, reference);
+	if (depth)
+		argv_array_push(&cp->args, depth);
+}
+
+static int update_clone_get_next_task(struct child_process *cp,
+				      struct strbuf *err,
+				      void *pp_cb,
+				      void **pp_task_cb)
+{
+	struct submodule_update_clone *pp = pp_cb;
+
+	for (; pp->count < pp->list.nr; pp->count++) {
+		const struct submodule *sub = NULL;
+		const struct cache_entry *ce = pp->list.entries[pp->count];
+		struct strbuf displaypath_sb = STRBUF_INIT;
+		struct strbuf sb = STRBUF_INIT;
+		const char *displaypath = NULL;
+		const char *update_module = NULL;
+		char *url = NULL;
+		int needs_cloning = 0;
+
+		if (ce_stage(ce)) {
+			if (pp->recursive_prefix)
+				strbuf_addf(err,
+					"Skipping unmerged submodule %s/%s\n",
+					pp->recursive_prefix, ce->name);
+			else
+				strbuf_addf(err,
+					"Skipping unmerged submodule %s\n",
+					ce->name);
+			goto cleanup_and_continue;
+		}
+
+		sub = submodule_from_path(null_sha1, ce->name);
+
+		if (pp->recursive_prefix)
+			displaypath = relative_path(pp->recursive_prefix,
+						    ce->name, &displaypath_sb);
+		else
+			displaypath = ce->name;
+
+		if (pp->update)
+			update_module = pp->update;
+		if (!update_module)
+			update_module = sub->update;
+		if (!update_module)
+			update_module = "checkout";
+		if (!strcmp(update_module, "none")) {
+			strbuf_addf(err, "Skipping submodule '%s'\n",
+				    displaypath);
+			goto cleanup_and_continue;
+		}
+
+		/*
+		 * Looking up the url in .git/config.
+		 * We must not fall back to .gitmodules as we only want
+		 * to process configured submodules.
+		 */
+		strbuf_reset(&sb);
+		strbuf_addf(&sb, "submodule.%s.url", sub->name);
+		git_config_get_string(sb.buf, &url);
+		if (!url) {
+			/*
+			 * Only mention uninitialized submodules when its
+			 * path have been specified
+			 */
+			if (pp->pathspec.nr)
+				strbuf_addf(err,
+					    _("Submodule path '%s' not initialized\n"
+					    "Maybe you want to use 'update --init'?"),
+					    displaypath);
+			goto cleanup_and_continue;
+		}
+
+		strbuf_reset(&sb);
+		strbuf_addf(&sb, "%s/.git", ce->name);
+		needs_cloning = !file_exists(sb.buf);
+
+		strbuf_reset(&sb);
+		strbuf_addf(&sb, "%06o %s %d %d\t%s\n", ce->ce_mode,
+				sha1_to_hex(ce->sha1), ce_stage(ce),
+				needs_cloning, ce->name);
+		string_list_append(&pp->projectlines, sb.buf);
+
+		if (needs_cloning) {
+			fill_clone_command(cp, pp->quiet, pp->prefix,
+					   ce->name, sub->name, strdup(url),
+					   pp->reference, pp->depth);
+			pp->count++;
+		}
+
+cleanup_and_continue:
+		free(url);
+		strbuf_reset(&displaypath_sb);
+		strbuf_reset(&sb);
+
+		if (needs_cloning)
+			return 1;
+	}
+	return 0;
+}
+
+static int update_clone_start_failure(struct child_process *cp,
+				      struct strbuf *err,
+				      void *pp_cb,
+				      void *pp_task_cb)
+{
+	struct submodule_update_clone *pp = pp_cb;
+
+	strbuf_addf(err, "error when starting a child process");
+	pp->print_unmatched = 1;
+
+	return 1;
+}
+
+static int update_clone_task_finished(int result,
+				      struct child_process *cp,
+				      struct strbuf *err,
+				      void *pp_cb,
+				      void *pp_task_cb)
+{
+	struct submodule_update_clone *pp = pp_cb;
+
+	if (!result) {
+		return 0;
+	} else {
+		strbuf_addf(err, "error in one child process");
+		pp->print_unmatched = 1;
+		return 1;
+	}
+}
+
+static int update_clone(int argc, const char **argv, const char *prefix)
+{
+	struct string_list_item *item;
+	struct submodule_update_clone pp = SUBMODULE_UPDATE_CLONE_INIT;
+
+	struct option module_list_options[] = {
+		OPT_STRING(0, "prefix", &prefix,
+			   N_("path"),
+			   N_("path into the working tree")),
+		OPT_STRING(0, "recursive_prefix", &pp.recursive_prefix,
+			   N_("path"),
+			   N_("path into the working tree, across nested "
+			      "submodule boundaries")),
+		OPT_STRING(0, "update", &pp.update,
+			   N_("string"),
+			   N_("update command for submodules")),
+		OPT_STRING(0, "reference", &pp.reference, "<repository>",
+			   N_("Use the local reference repository "
+			      "instead of a full clone")),
+		OPT_STRING(0, "depth", &pp.depth, "<depth>",
+			   N_("Create a shallow clone truncated to the "
+			      "specified number of revisions")),
+		OPT__QUIET(&pp.quiet, N_("do't print cloning progress")),
+		OPT_END()
+	};
+
+	const char *const git_submodule_helper_usage[] = {
+		N_("git submodule--helper list [--prefix=<path>] [<path>...]"),
+		NULL
+	};
+	pp.prefix = prefix;
+
+	argc = parse_options(argc, argv, prefix, module_list_options,
+			     git_submodule_helper_usage, 0);
+
+	if (module_list_compute(argc, argv, prefix, &pp.pathspec, &pp.list) < 0) {
+		printf("#unmatched\n");
+		return 1;
+	}
+
+	gitmodules_config();
+	/* Overlay the parsed .gitmodules file with .git/config */
+	git_config(git_submodule_config, NULL);
+	run_processes_parallel(1, update_clone_get_next_task,
+				  update_clone_start_failure,
+				  update_clone_task_finished,
+				  &pp);
+
+	if (pp.print_unmatched) {
+		printf("#unmatched\n");
+		return 1;
+	}
+
+	for_each_string_list_item(item, &pp.projectlines)
+		utf8_fprintf(stdout, "%s", item->string);
+
+	return 0;
+}
+
 struct cmd_struct {
 	const char *cmd;
 	int (*fn)(int, const char **, const char *);
@@ -264,6 +499,7 @@ static struct cmd_struct commands[] = {
 	{"list", module_list},
 	{"name", module_name},
 	{"clone", module_clone},
+	{"update-clone", update_clone}
 };
 
 int cmd_submodule__helper(int argc, const char **argv, const char *prefix)
diff --git a/git-submodule.sh b/git-submodule.sh
index 9bc5c5f..9f554fb 100755
--- a/git-submodule.sh
+++ b/git-submodule.sh
@@ -664,17 +664,18 @@ cmd_update()
 		cmd_init "--" "$@" || return
 	fi
 
-	cloned_modules=
-	git submodule--helper list --prefix "$wt_prefix" "$@" | {
+	git submodule--helper update-clone ${GIT_QUIET:+--quiet} \
+		${wt_prefix:+--prefix "$wt_prefix"} \
+		${prefix:+--recursive_prefix "$prefix"} \
+		${update:+--update "$update"} \
+		${reference:+--reference "$reference"} \
+		${depth:+--depth "$depth"} \
+		"$@" | {
 	err=
-	while read mode sha1 stage sm_path
+	while read mode sha1 stage just_cloned sm_path
 	do
 		die_if_unmatched "$mode"
-		if test "$stage" = U
-		then
-			echo >&2 "Skipping unmerged submodule $prefix$sm_path"
-			continue
-		fi
+
 		name=$(git submodule--helper name "$sm_path") || exit
 		url=$(git config submodule."$name".url)
 		branch=$(get_submodule_config "$name" branch master)
@@ -691,27 +692,10 @@ cmd_update()
 
 		displaypath=$(relative_path "$prefix$sm_path")
 
-		if test "$update_module" = "none"
-		then
-			echo "Skipping submodule '$displaypath'"
-			continue
-		fi
-
-		if test -z "$url"
-		then
-			# Only mention uninitialized submodules when its
-			# path have been specified
-			test "$#" != "0" &&
-			say "$(eval_gettext "Submodule path '\$displaypath' not initialized
-Maybe you want to use 'update --init'?")"
-			continue
-		fi
-
-		if ! test -d "$sm_path"/.git && ! test -f "$sm_path"/.git
+		if test $just_cloned -eq 1
 		then
-			git submodule--helper clone ${GIT_QUIET:+--quiet} --prefix "$prefix" --path "$sm_path" --name "$name" --url "$url" "$reference" "$depth" || exit
-			cloned_modules="$cloned_modules;$name"
 			subsha1=
+			update_module=checkout
 		else
 			subsha1=$(clear_local_git_env; cd "$sm_path" &&
 				git rev-parse --verify HEAD) ||
@@ -751,13 +735,6 @@ Maybe you want to use 'update --init'?")"
 				die "$(eval_gettext "Unable to fetch in submodule path '\$displaypath'")"
 			fi
 
-			# Is this something we just cloned?
-			case ";$cloned_modules;" in
-			*";$name;"*)
-				# then there is no local change to integrate
-				update_module=checkout ;;
-			esac
-
 			must_die_on_failure=
 			case "$update_module" in
 			checkout)
diff --git a/t/t7400-submodule-basic.sh b/t/t7400-submodule-basic.sh
index 540771c..5991e3c 100755
--- a/t/t7400-submodule-basic.sh
+++ b/t/t7400-submodule-basic.sh
@@ -462,7 +462,7 @@ test_expect_success 'update --init' '
 	git config --remove-section submodule.example &&
 	test_must_fail git config submodule.example.url &&
 
-	git submodule update init > update.out &&
+	git submodule update init 2> update.out &&
 	cat update.out &&
 	test_i18ngrep "not initialized" update.out &&
 	test_must_fail git rev-parse --resolve-git-dir init/.git &&
@@ -480,7 +480,7 @@ test_expect_success 'update --init from subdirectory' '
 	mkdir -p sub &&
 	(
 		cd sub &&
-		git submodule update ../init >update.out &&
+		git submodule update ../init 2>update.out &&
 		cat update.out &&
 		test_i18ngrep "not initialized" update.out &&
 		test_must_fail git rev-parse --resolve-git-dir ../init/.git &&
-- 
2.7.0.rc0.41.gd102984.dirty

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCHv8 8/9] submodule update: expose parallelism to the user
  2016-02-04 22:09 [PATCHv8 0/9] Expose submodule parallelism to the user Stefan Beller
                   ` (6 preceding siblings ...)
  2016-02-04 22:09 ` [PATCHv8 7/9] git submodule update: have a dedicated helper for cloning Stefan Beller
@ 2016-02-04 22:09 ` Stefan Beller
  2016-02-04 22:09 ` [PATCHv8 9/9] clone: allow an explicit argument for parallel submodule clones Stefan Beller
  8 siblings, 0 replies; 23+ messages in thread
From: Stefan Beller @ 2016-02-04 22:09 UTC (permalink / raw)
  To: git, gitster; +Cc: jrnieder, Jens.Lehmann, Stefan Beller

Expose possible parallelism either via the "--jobs" CLI parameter or
the "submodule.fetchJobs" setting.

By having the variable initialized to -1, we make sure 0 can be passed
into the parallel processing machine, which will then pick as many parallel
workers as there are CPUs.

Signed-off-by: Stefan Beller <sbeller@google.com>
---
 Documentation/git-submodule.txt |  7 ++++++-
 builtin/submodule--helper.c     | 18 ++++++++++++++----
 git-submodule.sh                |  9 +++++++++
 t/t7406-submodule-update.sh     | 12 ++++++++++++
 4 files changed, 41 insertions(+), 5 deletions(-)

diff --git a/Documentation/git-submodule.txt b/Documentation/git-submodule.txt
index 1572f05..13adebf 100644
--- a/Documentation/git-submodule.txt
+++ b/Documentation/git-submodule.txt
@@ -16,7 +16,7 @@ SYNOPSIS
 'git submodule' [--quiet] deinit [-f|--force] [--] <path>...
 'git submodule' [--quiet] update [--init] [--remote] [-N|--no-fetch]
 	      [-f|--force] [--rebase|--merge] [--reference <repository>]
-	      [--depth <depth>] [--recursive] [--] [<path>...]
+	      [--depth <depth>] [--recursive] [--jobs <n>] [--] [<path>...]
 'git submodule' [--quiet] summary [--cached|--files] [(-n|--summary-limit) <n>]
 	      [commit] [--] [<path>...]
 'git submodule' [--quiet] foreach [--recursive] <command>
@@ -377,6 +377,11 @@ for linkgit:git-clone[1]'s `--reference` and `--shared` options carefully.
 	clone with a history truncated to the specified number of revisions.
 	See linkgit:git-clone[1]
 
+-j <n>::
+--jobs <n>::
+	This option is only valid for the update command.
+	Clone new submodules in parallel with as many jobs.
+	Defaults to the `submodule.fetchJobs` option.
 
 <path>...::
 	Paths to submodule(s). When specified this will restrict the command
diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
index 90f9dc6..7e01b85 100644
--- a/builtin/submodule--helper.c
+++ b/builtin/submodule--helper.c
@@ -433,6 +433,7 @@ static int update_clone_task_finished(int result,
 
 static int update_clone(int argc, const char **argv, const char *prefix)
 {
+	int max_jobs = -1;
 	struct string_list_item *item;
 	struct submodule_update_clone pp = SUBMODULE_UPDATE_CLONE_INIT;
 
@@ -453,6 +454,8 @@ static int update_clone(int argc, const char **argv, const char *prefix)
 		OPT_STRING(0, "depth", &pp.depth, "<depth>",
 			   N_("Create a shallow clone truncated to the "
 			      "specified number of revisions")),
+		OPT_INTEGER('j', "jobs", &max_jobs,
+			    N_("parallel jobs")),
 		OPT__QUIET(&pp.quiet, N_("do't print cloning progress")),
 		OPT_END()
 	};
@@ -474,10 +477,17 @@ static int update_clone(int argc, const char **argv, const char *prefix)
 	gitmodules_config();
 	/* Overlay the parsed .gitmodules file with .git/config */
 	git_config(git_submodule_config, NULL);
-	run_processes_parallel(1, update_clone_get_next_task,
-				  update_clone_start_failure,
-				  update_clone_task_finished,
-				  &pp);
+
+	if (max_jobs < 0)
+		max_jobs = config_parallel_submodules();
+	if (max_jobs < 0)
+		max_jobs = 1;
+
+	run_processes_parallel(max_jobs,
+			       update_clone_get_next_task,
+			       update_clone_start_failure,
+			       update_clone_task_finished,
+			       &pp);
 
 	if (pp.print_unmatched) {
 		printf("#unmatched\n");
diff --git a/git-submodule.sh b/git-submodule.sh
index 9f554fb..10c5af9 100755
--- a/git-submodule.sh
+++ b/git-submodule.sh
@@ -645,6 +645,14 @@ cmd_update()
 		--depth=*)
 			depth=$1
 			;;
+		-j|--jobs)
+			case "$2" in '') usage ;; esac
+			jobs="--jobs=$2"
+			shift
+			;;
+		--jobs=*)
+			jobs=$1
+			;;
 		--)
 			shift
 			break
@@ -670,6 +678,7 @@ cmd_update()
 		${update:+--update "$update"} \
 		${reference:+--reference "$reference"} \
 		${depth:+--depth "$depth"} \
+		${jobs:+$jobs} \
 		"$@" | {
 	err=
 	while read mode sha1 stage just_cloned sm_path
diff --git a/t/t7406-submodule-update.sh b/t/t7406-submodule-update.sh
index dda3929..7fd5142 100755
--- a/t/t7406-submodule-update.sh
+++ b/t/t7406-submodule-update.sh
@@ -774,4 +774,16 @@ test_expect_success 'submodule update --recursive drops module name before recur
 	 test_i18ngrep "Submodule path .deeper/submodule/subsubmodule.: checked out" actual
 	)
 '
+
+test_expect_success 'submodule update can be run in parallel' '
+	(cd super2 &&
+	 GIT_TRACE=$(pwd)/trace.out git submodule update --jobs 7 &&
+	 grep "7 tasks" trace.out &&
+	 git config submodule.fetchJobs 8 &&
+	 GIT_TRACE=$(pwd)/trace.out git submodule update &&
+	 grep "8 tasks" trace.out &&
+	 GIT_TRACE=$(pwd)/trace.out git submodule update --jobs 9 &&
+	 grep "9 tasks" trace.out
+	)
+'
 test_done
-- 
2.7.0.rc0.41.gd102984.dirty

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCHv8 9/9] clone: allow an explicit argument for parallel submodule clones
  2016-02-04 22:09 [PATCHv8 0/9] Expose submodule parallelism to the user Stefan Beller
                   ` (7 preceding siblings ...)
  2016-02-04 22:09 ` [PATCHv8 8/9] submodule update: expose parallelism to the user Stefan Beller
@ 2016-02-04 22:09 ` Stefan Beller
  8 siblings, 0 replies; 23+ messages in thread
From: Stefan Beller @ 2016-02-04 22:09 UTC (permalink / raw)
  To: git, gitster; +Cc: jrnieder, Jens.Lehmann, Stefan Beller

Just pass it along to "git submodule update", which may pick reasonable
defaults if you don't specify an explicit number.

Signed-off-by: Stefan Beller <sbeller@google.com>
---
 Documentation/git-clone.txt |  6 +++++-
 builtin/clone.c             | 19 +++++++++++++------
 t/t7406-submodule-update.sh | 15 +++++++++++++++
 3 files changed, 33 insertions(+), 7 deletions(-)

diff --git a/Documentation/git-clone.txt b/Documentation/git-clone.txt
index 6bf000d..6db7b6d 100644
--- a/Documentation/git-clone.txt
+++ b/Documentation/git-clone.txt
@@ -14,7 +14,7 @@ SYNOPSIS
 	  [-o <name>] [-b <name>] [-u <upload-pack>] [--reference <repository>]
 	  [--dissociate] [--separate-git-dir <git dir>]
 	  [--depth <depth>] [--[no-]single-branch]
-	  [--recursive | --recurse-submodules] [--] <repository>
+	  [--recursive | --recurse-submodules] [--jobs <n>] [--] <repository>
 	  [<directory>]
 
 DESCRIPTION
@@ -221,6 +221,10 @@ objects from the source repository into a pack in the cloned repository.
 	The result is Git repository can be separated from working
 	tree.
 
+-j <n>::
+--jobs <n>::
+	The number of submodules fetched at the same time.
+	Defaults to the `submodule.fetchJobs` option.
 
 <repository>::
 	The (possibly remote) repository to clone from.  See the
diff --git a/builtin/clone.c b/builtin/clone.c
index a0b3cd9..b004fb4 100644
--- a/builtin/clone.c
+++ b/builtin/clone.c
@@ -50,6 +50,7 @@ static int option_progress = -1;
 static struct string_list option_config;
 static struct string_list option_reference;
 static int option_dissociate;
+static int max_jobs = -1;
 
 static struct option builtin_clone_options[] = {
 	OPT__VERBOSITY(&option_verbosity),
@@ -72,6 +73,8 @@ static struct option builtin_clone_options[] = {
 		    N_("initialize submodules in the clone")),
 	OPT_BOOL(0, "recurse-submodules", &option_recursive,
 		    N_("initialize submodules in the clone")),
+	OPT_INTEGER('j', "jobs", &max_jobs,
+		    N_("number of submodules cloned in parallel")),
 	OPT_STRING(0, "template", &option_template, N_("template-directory"),
 		   N_("directory from which templates will be used")),
 	OPT_STRING_LIST(0, "reference", &option_reference, N_("repo"),
@@ -95,10 +98,6 @@ static struct option builtin_clone_options[] = {
 	OPT_END()
 };
 
-static const char *argv_submodule[] = {
-	"submodule", "update", "--init", "--recursive", NULL
-};
-
 static const char *get_repo_path_1(struct strbuf *path, int *is_bundle)
 {
 	static char *suffix[] = { "/.git", "", ".git/.git", ".git" };
@@ -724,8 +723,16 @@ static int checkout(void)
 	err |= run_hook_le(NULL, "post-checkout", sha1_to_hex(null_sha1),
 			   sha1_to_hex(sha1), "1", NULL);
 
-	if (!err && option_recursive)
-		err = run_command_v_opt(argv_submodule, RUN_GIT_CMD);
+	if (!err && option_recursive) {
+		struct argv_array args = ARGV_ARRAY_INIT;
+		argv_array_pushl(&args, "submodule", "update", "--init", "--recursive", NULL);
+
+		if (max_jobs != -1)
+			argv_array_pushf(&args, "--jobs=%d", max_jobs);
+
+		err = run_command_v_opt(args.argv, RUN_GIT_CMD);
+		argv_array_clear(&args);
+	}
 
 	return err;
 }
diff --git a/t/t7406-submodule-update.sh b/t/t7406-submodule-update.sh
index 7fd5142..090891e 100755
--- a/t/t7406-submodule-update.sh
+++ b/t/t7406-submodule-update.sh
@@ -786,4 +786,19 @@ test_expect_success 'submodule update can be run in parallel' '
 	 grep "9 tasks" trace.out
 	)
 '
+
+test_expect_success 'git clone passes the parallel jobs config on to submodules' '
+	test_when_finished "rm -rf super4" &&
+	GIT_TRACE=$(pwd)/trace.out git clone --recurse-submodules --jobs 7 . super4 &&
+	grep "7 tasks" trace.out &&
+	rm -rf super4 &&
+	git config --global submodule.fetchJobs 8 &&
+	GIT_TRACE=$(pwd)/trace.out git clone --recurse-submodules . super4 &&
+	grep "8 tasks" trace.out &&
+	rm -rf super4 &&
+	GIT_TRACE=$(pwd)/trace.out git clone --recurse-submodules --jobs 9 . super4 &&
+	grep "9 tasks" trace.out &&
+	rm -rf super4
+'
+
 test_done
-- 
2.7.0.rc0.41.gd102984.dirty

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [PATCHv8 1/9] submodule-config: keep update strategy around
  2016-02-04 22:09 ` [PATCHv8 1/9] submodule-config: keep update strategy around Stefan Beller
@ 2016-02-05  0:59   ` Jonathan Nieder
  2016-02-05 20:25     ` Stefan Beller
  0 siblings, 1 reply; 23+ messages in thread
From: Jonathan Nieder @ 2016-02-05  0:59 UTC (permalink / raw)
  To: Stefan Beller; +Cc: git, gitster, Jens.Lehmann

Hi,

It's been a while since I looked at this series.  Hopefully I can
come at it with some fresh eyes.  Thanks for your perseverance.

Stefan Beller wrote:

> We need the submodule update strategies in a later patch.

This description doesn't explain what the patch will do or why
parse_config didn't already retain the value.  If I look back
at this patch later and want to understand why it was written,
what would I want to know?

It could say

	Currently submodule.<name>.update is only handled by git-submodule.sh.
	C code will start to need to make use of that value as more of the
	functionality of git-submodule.sh moves into library code in C.

	Add the update field to 'struct submodule' and populate it so it can
	be read as sm->update.

[...]
> +++ b/submodule-config.c
> @@ -311,6 +312,16 @@ static int parse_config(const char *var, const char *value, void *data)
>  			free((void *) submodule->url);
>  			submodule->url = xstrdup(value);
>  		}
> +	} else if (!strcmp(item.buf, "update")) {
> +		if (!value)
> +			ret = config_error_nonbool(var);
> +		else if (!me->overwrite && submodule->update != NULL)
> +			warn_multiple_config(me->commit_sha1, submodule->name,
> +					     "update");
> +		else {
> +			free((void *) submodule->update);
> +			submodule->update = xstrdup(value);
> +		}

(not about this patch) This code is repetitive.  Would there be a way
to share code between the parsing of different per-submodule settings?

[...]
> --- a/submodule-config.h
> +++ b/submodule-config.h
> @@ -14,6 +14,7 @@ struct submodule {
>  	const char *url;
>  	int fetch_recurse;
>  	const char *ignore;
> +	const char *update;

gitmodules(5) tells me the only allowed values are checkout, rebase,
merge, and none.  I wouldn't know at a glance how to match against
those in calling code.  Can this be an enum?

Thanks,
Jonathan

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCHv8 2/9] submodule-config: drop check against NULL
  2016-02-04 22:09 ` [PATCHv8 2/9] submodule-config: drop check against NULL Stefan Beller
@ 2016-02-05  1:05   ` Jonathan Nieder
  0 siblings, 0 replies; 23+ messages in thread
From: Jonathan Nieder @ 2016-02-05  1:05 UTC (permalink / raw)
  To: Stefan Beller; +Cc: git, gitster, Jens.Lehmann

Stefan Beller wrote:

> Adhere to the common coding style of Git and not check explicitly
> for NULL throughout the file. There are still other occurrences in the
> code base but that is usually inside of conditions with side effects.
>
> Signed-off-by: Stefan Beller <sbeller@google.com>
> ---
>  submodule-config.c | 8 ++++----
>  1 file changed, 4 insertions(+), 4 deletions(-)

Thanks.
Reviewed-by: Jonathan Nieder <jrnieder@gmail.com>

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCHv8 6/9] fetching submodules: respect `submodule.fetchJobs` config option
  2016-02-04 22:09 ` [PATCHv8 6/9] fetching submodules: respect `submodule.fetchJobs` config option Stefan Beller
@ 2016-02-05  3:29   ` Junio C Hamano
  2016-02-05 18:50     ` Stefan Beller
  0 siblings, 1 reply; 23+ messages in thread
From: Junio C Hamano @ 2016-02-05  3:29 UTC (permalink / raw)
  To: Stefan Beller; +Cc: git, jrnieder, Jens.Lehmann

Stefan Beller <sbeller@google.com> writes:

> diff --git a/builtin/fetch.c b/builtin/fetch.c
> index 586840d..5aa1c2d 100644
> --- a/builtin/fetch.c
> +++ b/builtin/fetch.c
> @@ -37,7 +37,7 @@ static int prune = -1; /* unspecified */
>  static int all, append, dry_run, force, keep, multiple, update_head_ok, verbosity;
>  static int progress = -1, recurse_submodules = RECURSE_SUBMODULES_DEFAULT;
>  static int tags = TAGS_DEFAULT, unshallow, update_shallow;
> -static int max_children = 1;
> +static int max_children = -1;

This makes sense as you would later be doing "If left unspecified,
i.e. -1, then fall back to blah" ...

g> diff --git a/submodule-config.c b/submodule-config.c
> index 2841c5e..339b59d 100644
> --- a/submodule-config.c
> +++ b/submodule-config.c
> @@ -32,6 +32,7 @@ enum lookup_type {
>  
>  static struct submodule_cache cache;
>  static int is_cache_init;
> +static unsigned long parallel_jobs = -1;

... but I do not think this does.  For one thing, you would not be
doing "If parallel_jobs < -1, then that is unspecified yet" for the
unsigned long variable, and for another, I do not think you would be
behaving differently for the first time (i.e. "unspecified yet" case).

I think you want to initialize this to 1, if your "not configured at
all" default is supposed to be 1.

> @@ -233,6 +234,13 @@ static int parse_generic_submodule_config(const char *key,
>  					  const char *value,
>  					  struct parse_config_parameter *me)
>  {
> +	if (!strcmp(key, "fetchjobs")) {
> +		if (!git_parse_ulong(value, &parallel_jobs)) {
> +			warning(_("Error parsing submodule.fetchJobs; Defaulting to 1."));
> +			parallel_jobs = 1;

Hmph, this is not a die() because...?  Not a rhetorical question.

> +unsigned long config_parallel_submodules(void)
> +{
> +	return parallel_jobs;
> +}

It is not a crime to make this return "int", as the code that
eventually uses it will assign it to a variable that will be
passed to run_processes_parallel() that takes an "int".

In fact, returning "int" would be preferrable.  You are causing
truncation somewhere in the callchain anyway.  If the truncation
bothers you, this function or immediately after calling
git_parse_ulong() would be a good place to do a range check.  The
variable parallel_jobs has to stay "unsigned long" in any case as
you are calling git_parse_ulong().

> diff --git a/submodule.c b/submodule.c
> index b83939c..eb7d54b 100644
> --- a/submodule.c
> +++ b/submodule.c
> @@ -751,6 +751,11 @@ int fetch_populated_submodules(const struct argv_array *options,
>  	argv_array_push(&spf.args, "--recurse-submodules-default");
>  	/* default value, "--submodule-prefix" and its value are added later */
>  
> +	if (max_parallel_jobs < 0)
> +		max_parallel_jobs = config_parallel_submodules();
> +	if (max_parallel_jobs < 0)
> +		max_parallel_jobs = 1;
> +
>  	calculate_changed_submodule_paths();
>  	run_processes_parallel(max_parallel_jobs,
>  			       get_next_submodule,

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCHv8 6/9] fetching submodules: respect `submodule.fetchJobs` config option
  2016-02-05  3:29   ` Junio C Hamano
@ 2016-02-05 18:50     ` Stefan Beller
  2016-02-05 19:59       ` Junio C Hamano
  0 siblings, 1 reply; 23+ messages in thread
From: Stefan Beller @ 2016-02-05 18:50 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Jonathan Nieder, Jens Lehmann

On Thu, Feb 4, 2016 at 7:29 PM, Junio C Hamano <gitster@pobox.com> wrote:
> Stefan Beller <sbeller@google.com> writes:
>
>> diff --git a/builtin/fetch.c b/builtin/fetch.c
>> index 586840d..5aa1c2d 100644
>> --- a/builtin/fetch.c
>> +++ b/builtin/fetch.c
>> @@ -37,7 +37,7 @@ static int prune = -1; /* unspecified */
>>  static int all, append, dry_run, force, keep, multiple, update_head_ok, verbosity;
>>  static int progress = -1, recurse_submodules = RECURSE_SUBMODULES_DEFAULT;
>>  static int tags = TAGS_DEFAULT, unshallow, update_shallow;
>> -static int max_children = 1;
>> +static int max_children = -1;
>
> This makes sense as you would later be doing "If left unspecified,
> i.e. -1, then fall back to blah" ...

Right,
max_children is passed into fetch_populated_submodules, which
should handle that....

>
> g> diff --git a/submodule-config.c b/submodule-config.c
>> index 2841c5e..339b59d 100644
>> --- a/submodule-config.c
>> +++ b/submodule-config.c
>> @@ -32,6 +32,7 @@ enum lookup_type {
>>
>>  static struct submodule_cache cache;
>>  static int is_cache_init;
>> +static unsigned long parallel_jobs = -1;
>
> ... but I do not think this does.  For one thing, you would not be
> doing "If parallel_jobs < -1, then that is unspecified yet" for the
> unsigned long variable, and for another, I do not think you would be
> behaving differently for the first time (i.e. "unspecified yet" case).
>
> I think you want to initialize this to 1, if your "not configured at
> all" default is supposed to be 1.

Please read on in the patch, such that you find

@@ -751,6 +751,11 @@ int fetch_populated_submodules(
(This is where the max_children from builtin/fetch is passed into,
named max_parallel_jobs now)

+       if (max_parallel_jobs < 0)
+               max_parallel_jobs = config_parallel_submodules();
+       if (max_parallel_jobs < 0)
+               max_parallel_jobs = 1;
+

So if we don't get the config option from builtin/fetch, we ask for
config_parallel_submodules(), which is what you were seeing above
initialized as -1, too.  And if that is still -1, we default to 1 there.

So from a design perspective, you're rather saying we want to move part of
this logic into submodule-config.c, such that
config_parallel_submodules never returns -1,
but either 1 as the default or the actual configuration?
Then the code in fetch_populated_submodules, would be as simple as

    if (max_parallel_jobs < 0)
        max_parallel_jobs = config_parallel_submodules();
    // done , as config_parallel_submodules(); gives the last
authoritive answer.

Well looking at submodule-config.c this might be easier, too.

Ok.

>
>> @@ -233,6 +234,13 @@ static int parse_generic_submodule_config(const char *key,
>>                                         const char *value,
>>                                         struct parse_config_parameter *me)
>>  {
>> +     if (!strcmp(key, "fetchjobs")) {
>> +             if (!git_parse_ulong(value, &parallel_jobs)) {
>> +                     warning(_("Error parsing submodule.fetchJobs; Defaulting to 1."));
>> +                     parallel_jobs = 1;
>
> Hmph, this is not a die() because...?  Not a rhetorical question.

Because this config option doesn't alter the state of the repository.
After the fact you should not be able to tell how much parallel operations
were going on.

(As opposed to other options which alter the state of the repository)

I'll change it to die(...), though it sounds overly strict to me in this case.
But I guess consistency beats overstrictness here.

>
>> +unsigned long config_parallel_submodules(void)
>> +{
>> +     return parallel_jobs;
>> +}
>
> It is not a crime to make this return "int", as the code that
> eventually uses it will assign it to a variable that will be
> passed to run_processes_parallel() that takes an "int".

ok

>
> In fact, returning "int" would be preferrable.  You are causing
> truncation somewhere in the callchain anyway.  If the truncation
> bothers you, this function or immediately after calling
> git_parse_ulong() would be a good place to do a range check.  The
> variable parallel_jobs has to stay "unsigned long" in any case as
> you are calling git_parse_ulong().

ok, that should be the best place.

>
>> diff --git a/submodule.c b/submodule.c
>> index b83939c..eb7d54b 100644
>> --- a/submodule.c
>> +++ b/submodule.c
>> @@ -751,6 +751,11 @@ int fetch_populated_submodules(const struct argv_array *options,
>>       argv_array_push(&spf.args, "--recurse-submodules-default");
>>       /* default value, "--submodule-prefix" and its value are added later */
>>
>> +     if (max_parallel_jobs < 0)
>> +             max_parallel_jobs = config_parallel_submodules();
>> +     if (max_parallel_jobs < 0)
>> +             max_parallel_jobs = 1;
>> +
>>       calculate_changed_submodule_paths();
>>       run_processes_parallel(max_parallel_jobs,
>>                              get_next_submodule,

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCHv8 6/9] fetching submodules: respect `submodule.fetchJobs` config option
  2016-02-05 18:50     ` Stefan Beller
@ 2016-02-05 19:59       ` Junio C Hamano
  0 siblings, 0 replies; 23+ messages in thread
From: Junio C Hamano @ 2016-02-05 19:59 UTC (permalink / raw)
  To: Stefan Beller; +Cc: git, Jonathan Nieder, Jens Lehmann

Stefan Beller <sbeller@google.com> writes:

> On Thu, Feb 4, 2016 at 7:29 PM, Junio C Hamano <gitster@pobox.com> wrote:
>> Stefan Beller <sbeller@google.com> writes:
>> ...
>>> +static unsigned long parallel_jobs = -1;
>>
>> ... but I do not think this does....
>
> So if we don't get the config option from builtin/fetch, we ask for
> config_parallel_submodules(), which is what you were seeing above
> initialized as -1, too.  And if that is still -1, we default to 1 there.

But that is not really -1, but -1 casted to unsigned long that is
casted back to int.  

> So from a design perspective, you're rather saying we want to move part of
> this logic into submodule-config.c, such that
> config_parallel_submodules never returns -1,
> but either 1 as the default or the actual configuration?

That is not my design but yours ;-)

Expecting config_parallel_submodules() to return -1 when there is no
variable defined contradicts what you wrote in the documentation, I
think, where it says "this variable defaults to 1 when not set".

> Then the code in fetch_populated_submodules, would be as simple as
>
>     if (max_parallel_jobs < 0)
>         max_parallel_jobs = config_parallel_submodules();

Yup.

>>> @@ -233,6 +234,13 @@ static int parse_generic_submodule_config(const char *key,
>>>                                         const char *value,
>>>                                         struct parse_config_parameter *me)
>>>  {
>>> +     if (!strcmp(key, "fetchjobs")) {
>>> +             if (!git_parse_ulong(value, &parallel_jobs)) {
>>> +                     warning(_("Error parsing submodule.fetchJobs; Defaulting to 1."));
>>> +                     parallel_jobs = 1;
>>
>> Hmph, this is not a die() because...?  Not a rhetorical question.
>
> Because this config option doesn't alter the state of the repository.
> After the fact you should not be able to tell how much parallel operations
> were going on.
>
> (As opposed to other options which alter the state of the repository)
>
> I'll change it to die(...), though it sounds overly strict to me in this case.
> But I guess consistency beats overstrictness here.

I do not see it as being overly strict, though.

If I were a user of this feature, I'd rather see a problem pointed
out to me (e.g "you spelled 'true' but that fetchJobs expects a
positive integer") to help me fix it, rather than having to see this
warning every time I try to run submodule commands.

What benefit does a user get by being loose here?  Probably I am
not considering some valid use cases...

>>> +unsigned long config_parallel_submodules(void)
>>> +{
>>> +     return parallel_jobs;
>>> +}
>>
>> It is not a crime to make this return "int", as the code that
>> eventually uses it will assign it to a variable that will be
>> passed to run_processes_parallel() that takes an "int".
>
> ok
>
>>
>> In fact, returning "int" would be preferrable.  You are causing
>> truncation somewhere in the callchain anyway.  If the truncation
>> bothers you, this function or immediately after calling
>> git_parse_ulong() would be a good place to do a range check.  The
>> variable parallel_jobs has to stay "unsigned long" in any case as
>> you are calling git_parse_ulong().
>
> ok, that should be the best place.

Alternatively (I haven't weighed pros and cons myself), you can make
parallel_jobs an "int", call git_parse_ulong() using a temporary
"unsigned long" and after doing range check assign it to
parallel_jobs.  That would make this function really trivial ;-)

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCHv8 1/9] submodule-config: keep update strategy around
  2016-02-05  0:59   ` Jonathan Nieder
@ 2016-02-05 20:25     ` Stefan Beller
  2016-02-05 20:33       ` Jonathan Nieder
  0 siblings, 1 reply; 23+ messages in thread
From: Stefan Beller @ 2016-02-05 20:25 UTC (permalink / raw)
  To: Jonathan Nieder; +Cc: git, Junio C Hamano, Jens Lehmann

On Thu, Feb 4, 2016 at 4:59 PM, Jonathan Nieder <jrnieder@gmail.com> wrote:
> Hi,
>
> It's been a while since I looked at this series.  Hopefully I can
> come at it with some fresh eyes.  Thanks for your perseverance.
>
> Stefan Beller wrote:
>
>> We need the submodule update strategies in a later patch.
>
> This description doesn't explain what the patch will do or why
> parse_config didn't already retain the value.  If I look back
> at this patch later and want to understand why it was written,
> what would I want to know?
>
> It could say
>
>         Currently submodule.<name>.update is only handled by git-submodule.sh.
>         C code will start to need to make use of that value as more of the
>         functionality of git-submodule.sh moves into library code in C.
>
>         Add the update field to 'struct submodule' and populate it so it can
>         be read as sm->update.


ok

>
> [...]
>> +++ b/submodule-config.c
>> @@ -311,6 +312,16 @@ static int parse_config(const char *var, const char *value, void *data)
>>                       free((void *) submodule->url);
>>                       submodule->url = xstrdup(value);
>>               }
>> +     } else if (!strcmp(item.buf, "update")) {
>> +             if (!value)
>> +                     ret = config_error_nonbool(var);
>> +             else if (!me->overwrite && submodule->update != NULL)
>> +                     warn_multiple_config(me->commit_sha1, submodule->name,
>> +                                          "update");
>> +             else {
>> +                     free((void *) submodule->update);
>> +                     submodule->update = xstrdup(value);
>> +             }
>
> (not about this patch) This code is repetitive.  Would there be a way
> to share code between the parsing of different per-submodule settings?
>
> [...]
>> --- a/submodule-config.h
>> +++ b/submodule-config.h
>> @@ -14,6 +14,7 @@ struct submodule {
>>       const char *url;
>>       int fetch_recurse;
>>       const char *ignore;
>> +     const char *update;
>
> gitmodules(5) tells me the only allowed values are checkout, rebase,
> merge, and none.  I wouldn't know at a glance how to match against
> those in calling code.  Can this be an enum?

"Note that the !command form is intentionally ignored here for
security reasons."

However you can overwrite the update field in .git/config to be "! [foo]",
which we also need to support, so let's keep it a string for now?

>
> Thanks,
> Jonathan

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCHv8 1/9] submodule-config: keep update strategy around
  2016-02-05 20:25     ` Stefan Beller
@ 2016-02-05 20:33       ` Jonathan Nieder
  2016-02-05 20:36         ` Stefan Beller
  0 siblings, 1 reply; 23+ messages in thread
From: Jonathan Nieder @ 2016-02-05 20:33 UTC (permalink / raw)
  To: Stefan Beller; +Cc: git, Junio C Hamano, Jens Lehmann

Stefan Beller wrote:
> On Thu, Feb 4, 2016 at 4:59 PM, Jonathan Nieder <jrnieder@gmail.com> wrote:
>> Stefan Beller wrote:

>>> +++ b/submodule-config.h
>>> @@ -14,6 +14,7 @@ struct submodule {
>>> +     const char *update;
>>
>> gitmodules(5) tells me the only allowed values are checkout, rebase,
>> merge, and none.  I wouldn't know at a glance how to match against
>> those in calling code.  Can this be an enum?
>
> "Note that the !command form is intentionally ignored here for
> security reasons."
>
> However you can overwrite the update field in .git/config to be "! [foo]",
> which we also need to support, so let's keep it a string for now?

I had forgotten about "!command".  I think making it a string makes
the field hard to use: how do I determine whether it was checkout,
rebase, merge, custom, or none?  Are they case-sensitive?  Am I
supposed to strip whitespace?

One possibility would be to have an enum and a string:

	enum submodule_update_type update;
	const char *update_command;

Thanks,
Jonathan

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCHv8 1/9] submodule-config: keep update strategy around
  2016-02-05 20:33       ` Jonathan Nieder
@ 2016-02-05 20:36         ` Stefan Beller
  0 siblings, 0 replies; 23+ messages in thread
From: Stefan Beller @ 2016-02-05 20:36 UTC (permalink / raw)
  To: Jonathan Nieder; +Cc: git, Junio C Hamano, Jens Lehmann

On Fri, Feb 5, 2016 at 12:33 PM, Jonathan Nieder <jrnieder@gmail.com> wrote:
> Stefan Beller wrote:
>> On Thu, Feb 4, 2016 at 4:59 PM, Jonathan Nieder <jrnieder@gmail.com> wrote:
>>> Stefan Beller wrote:
>
>>>> +++ b/submodule-config.h
>>>> @@ -14,6 +14,7 @@ struct submodule {
>>>> +     const char *update;
>>>
>>> gitmodules(5) tells me the only allowed values are checkout, rebase,
>>> merge, and none.  I wouldn't know at a glance how to match against
>>> those in calling code.  Can this be an enum?
>>
>> "Note that the !command form is intentionally ignored here for
>> security reasons."
>>
>> However you can overwrite the update field in .git/config to be "! [foo]",
>> which we also need to support, so let's keep it a string for now?
>
> I had forgotten about "!command".  I think making it a string makes
> the field hard to use: how do I determine whether it was checkout,
> rebase, merge, custom, or none?  Are they case-sensitive?  Am I
> supposed to strip whitespace?
>
> One possibility would be to have an enum and a string:
>
>         enum submodule_update_type update;
>         const char *update_command;

ok, that makes sense.

>
> Thanks,
> Jonathan

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCHv8 3/9] submodule-config: remove name_and_item_from_var
  2016-02-04 22:09 ` [PATCHv8 3/9] submodule-config: remove name_and_item_from_var Stefan Beller
@ 2016-02-06  0:46   ` Jonathan Nieder
  2016-02-06  1:21     ` Stefan Beller
  0 siblings, 1 reply; 23+ messages in thread
From: Jonathan Nieder @ 2016-02-06  0:46 UTC (permalink / raw)
  To: Stefan Beller; +Cc: git, gitster, Jens.Lehmann

Hi,

Stefan Beller wrote:

> --- a/submodule-config.c
> +++ b/submodule-config.c
> @@ -251,18 +235,19 @@ static int parse_config(const char *var, const char *value, void *data)
>  {
>  	struct parse_config_parameter *me = data;
>  	struct submodule *submodule;
> -	struct strbuf name = STRBUF_INIT, item = STRBUF_INIT;
> -	int ret = 0;
> +	int subsection_len, ret = 0;
> +	const char *subsection, *key;
>  
> -	/* this also ensures that we only parse submodule entries */
> -	if (!name_and_item_from_var(var, &name, &item))
> +	if (parse_config_key(var, "submodule", &subsection,
> +			     &subsection_len, &key) < 0 || !subsection)
>  		return 0;
>  
> +	subsection = xmemdupz(subsection, subsection_len);

This appears to be leaked.

Thanks,
Jonathan

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCHv8 4/9] submodule-config: slightly simplify lookup_or_create_by_name
  2016-02-04 22:09 ` [PATCHv8 4/9] submodule-config: slightly simplify lookup_or_create_by_name Stefan Beller
@ 2016-02-06  0:48   ` Jonathan Nieder
  0 siblings, 0 replies; 23+ messages in thread
From: Jonathan Nieder @ 2016-02-06  0:48 UTC (permalink / raw)
  To: Stefan Beller; +Cc: git, gitster, Jens.Lehmann

Stefan Beller wrote:

> No need for a strbuf, when all we use it for, is duplicating a string.
>
> Signed-off-by: Stefan Beller <sbeller@google.com>
> ---
>  submodule-config.c | 5 +----
>  1 file changed, 1 insertion(+), 4 deletions(-)

Reviewed-by: Jonathan Nieder <jrnieder@gmail.com>

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCHv8 3/9] submodule-config: remove name_and_item_from_var
  2016-02-06  0:46   ` Jonathan Nieder
@ 2016-02-06  1:21     ` Stefan Beller
  0 siblings, 0 replies; 23+ messages in thread
From: Stefan Beller @ 2016-02-06  1:21 UTC (permalink / raw)
  To: Jonathan Nieder; +Cc: git, Junio C Hamano, Jens Lehmann

On Fri, Feb 5, 2016 at 4:46 PM, Jonathan Nieder <jrnieder@gmail.com> wrote:
> Hi,
>
> Stefan Beller wrote:
>
>> --- a/submodule-config.c
>> +++ b/submodule-config.c
>> @@ -251,18 +235,19 @@ static int parse_config(const char *var, const char *value, void *data)
>>  {
>>       struct parse_config_parameter *me = data;
>>       struct submodule *submodule;
>> -     struct strbuf name = STRBUF_INIT, item = STRBUF_INIT;
>> -     int ret = 0;
>> +     int subsection_len, ret = 0;
>> +     const char *subsection, *key;
>>
>> -     /* this also ensures that we only parse submodule entries */
>> -     if (!name_and_item_from_var(var, &name, &item))
>> +     if (parse_config_key(var, "submodule", &subsection,
>> +                          &subsection_len, &key) < 0 || !subsection)
>>               return 0;
>>
>> +     subsection = xmemdupz(subsection, subsection_len);
>
> This appears to be leaked.

Good point, will fix.

Though the impact is negligible, as this code goes away the next
patch and there is a free included.

Thanks,
Stefan

>
> Thanks,
> Jonathan

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCHv8 5/9] submodule-config: introduce parse_generic_submodule_config
  2016-02-04 22:09 ` [PATCHv8 5/9] submodule-config: introduce parse_generic_submodule_config Stefan Beller
@ 2016-02-06  1:23   ` Jonathan Nieder
  2016-02-06  1:36     ` Stefan Beller
  0 siblings, 1 reply; 23+ messages in thread
From: Jonathan Nieder @ 2016-02-06  1:23 UTC (permalink / raw)
  To: Stefan Beller; +Cc: git, gitster, Jens.Lehmann

Hi,

Stefan Beller wrote:

> This rewrites parse_config to distinguish between configs specific to
> one submodule and configs which apply generically to all submodules.
> We do not have generic submodule configs yet, but the next patch will
> introduce "submodule.fetchJobs".

Does this mean that options like submodule.fetchJobs can be specified
in either .gitmodules or .git/config, like the submodule.<name>.*
options?

I would expect a good value for fetchJobs to be machine-specific.
Would it work to put the submodule.fetchJobs handling in
submodule.c::submodule_config alongside fetch.recurseModules instead?

Thanks,
Jonathan

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCHv8 5/9] submodule-config: introduce parse_generic_submodule_config
  2016-02-06  1:23   ` Jonathan Nieder
@ 2016-02-06  1:36     ` Stefan Beller
  0 siblings, 0 replies; 23+ messages in thread
From: Stefan Beller @ 2016-02-06  1:36 UTC (permalink / raw)
  To: Jonathan Nieder; +Cc: git, Junio C Hamano, Jens Lehmann

On Fri, Feb 5, 2016 at 5:23 PM, Jonathan Nieder <jrnieder@gmail.com> wrote:
> Hi,
>
> Stefan Beller wrote:
>
>> This rewrites parse_config to distinguish between configs specific to
>> one submodule and configs which apply generically to all submodules.
>> We do not have generic submodule configs yet, but the next patch will
>> introduce "submodule.fetchJobs".
>
> Does this mean that options like submodule.fetchJobs can be specified
> in either .gitmodules or .git/config, like the submodule.<name>.*
> options?

oops, yes that would work.

I was diving down the stack when looking where to put the code for
submodule.fetchJobs, and as submodule.c::submodule_config
missguided me to submodule-config.c as it told me
'anything starting with "submodule." will be parsed in the
submodule-config file', so at the time of writing that felt natural to
put it there as well.

So generalizing from there, would we expect to
have any submodule options with no section in .gitmodules?

>
> I would expect a good value for fetchJobs to be machine-specific.
> Would it work to put the submodule.fetchJobs handling in
> submodule.c::submodule_config alongside fetch.recurseModules instead?

Sure that's possible. Although I would not mind having a preset default
from a project. Would it be possible for a project to be malicious with that?

(Answer yes it can, a superproject including itself multiple times and having
parallel set to >=2 behaves similar like a fork bomb when the parallelism is
not overwritten in the actual .git/config, I would think)

>
> Thanks,
> Jonathan

Thanks,
Stefan

^ permalink raw reply	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2016-02-06  1:36 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-02-04 22:09 [PATCHv8 0/9] Expose submodule parallelism to the user Stefan Beller
2016-02-04 22:09 ` [PATCHv8 1/9] submodule-config: keep update strategy around Stefan Beller
2016-02-05  0:59   ` Jonathan Nieder
2016-02-05 20:25     ` Stefan Beller
2016-02-05 20:33       ` Jonathan Nieder
2016-02-05 20:36         ` Stefan Beller
2016-02-04 22:09 ` [PATCHv8 2/9] submodule-config: drop check against NULL Stefan Beller
2016-02-05  1:05   ` Jonathan Nieder
2016-02-04 22:09 ` [PATCHv8 3/9] submodule-config: remove name_and_item_from_var Stefan Beller
2016-02-06  0:46   ` Jonathan Nieder
2016-02-06  1:21     ` Stefan Beller
2016-02-04 22:09 ` [PATCHv8 4/9] submodule-config: slightly simplify lookup_or_create_by_name Stefan Beller
2016-02-06  0:48   ` Jonathan Nieder
2016-02-04 22:09 ` [PATCHv8 5/9] submodule-config: introduce parse_generic_submodule_config Stefan Beller
2016-02-06  1:23   ` Jonathan Nieder
2016-02-06  1:36     ` Stefan Beller
2016-02-04 22:09 ` [PATCHv8 6/9] fetching submodules: respect `submodule.fetchJobs` config option Stefan Beller
2016-02-05  3:29   ` Junio C Hamano
2016-02-05 18:50     ` Stefan Beller
2016-02-05 19:59       ` Junio C Hamano
2016-02-04 22:09 ` [PATCHv8 7/9] git submodule update: have a dedicated helper for cloning Stefan Beller
2016-02-04 22:09 ` [PATCHv8 8/9] submodule update: expose parallelism to the user Stefan Beller
2016-02-04 22:09 ` [PATCHv8 9/9] clone: allow an explicit argument for parallel submodule clones Stefan Beller

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.