All of lore.kernel.org
 help / color / mirror / Atom feed
From: Calvin Wan <calvinwan@google.com>
To: git@vger.kernel.org
Cc: Calvin Wan <calvinwan@google.com>,
	avarab@gmail.com, chooglen@google.com, newren@gmail.com,
	jonathantanmy@google.com, phillip.wood123@gmail.com
Subject: [PATCH v8 0/6] submodule: parallelize diff
Date: Thu,  9 Feb 2023 00:02:06 +0000	[thread overview]
Message-ID: <20230209000212.1892457-1-calvinwan@google.com> (raw)
In-Reply-To: <20230207181706.363453-1-calvinwan@google.com>

Original cover letter for context:
https://lore.kernel.org/git/20221011232604.839941-1-calvinwan@google.com/

This reroll contains stylistic changes suggested by Avar and Phillip,
and includes a range-diff below.

Calvin Wan (6):
  run-command: add duplicate_output_fn to run_processes_parallel_opts
  submodule: strbuf variable rename
  submodule: move status parsing into function
  submodule: refactor is_submodule_modified()
  diff-lib: refactor out diff_change logic
  diff-lib: parallelize run_diff_files for submodules

 Documentation/config/submodule.txt |  12 ++
 diff-lib.c                         | 133 +++++++++++----
 run-command.c                      |  16 +-
 run-command.h                      |  25 +++
 submodule.c                        | 266 ++++++++++++++++++++++++-----
 submodule.h                        |   9 +
 t/helper/test-run-command.c        |  20 +++
 t/t0061-run-command.sh             |  39 +++++
 t/t4027-diff-submodule.sh          |  31 ++++
 t/t7506-status-submodule.sh        |  25 +++
 10 files changed, 497 insertions(+), 79 deletions(-)

Range-diff against v7:
1:  311b1abfbe ! 1:  5d51250c67 run-command: add duplicate_output_fn to run_processes_parallel_opts
    @@ run-command.c: static void pp_init(struct parallel_processes *pp,
      	if (!opts->get_next_task)
      		BUG("you need to specify a get_next_task function");
      
    -+	if (opts->duplicate_output && opts->ungroup)
    -+		BUG("duplicate_output and ungroup are incompatible with each other");
    ++	if (opts->ungroup) {
    ++		if (opts->duplicate_output)
    ++			BUG("duplicate_output and ungroup are incompatible with each other");
    ++	}
     +
      	CALLOC_ARRAY(pp->children, n);
      	if (!opts->ungroup)
    @@ run-command.c: static void pp_buffer_stderr(struct parallel_processes *pp,
     +			} else if (n < 0) {
      				if (errno != EAGAIN)
      					die_errno("read");
    -+			} else {
    -+				if (opts->duplicate_output)
    -+					opts->duplicate_output(&pp->children[i].err,
    -+					       strlen(pp->children[i].err.buf) - n,
    -+					       opts->data,
    -+					       pp->children[i].data);
    ++			} else if (opts->duplicate_output) {
    ++				opts->duplicate_output(&pp->children[i].err,
    ++					pp->children[i].err.len - n,
    ++					opts->data, pp->children[i].data);
     +			}
      		}
      	}
    @@ run-command.h: typedef int (*start_failure_fn)(struct strbuf *out,
     + *
     + * This function is incompatible with "ungroup"
     + */
    -+typedef void (*duplicate_output_fn)(struct strbuf *out,
    -+				    size_t offset,
    -+				    void *pp_cb,
    -+				    void *pp_task_cb);
    ++typedef void (*duplicate_output_fn)(struct strbuf *out, size_t offset,
    ++				    void *pp_cb, void *pp_task_cb);
     +
      /**
       * This callback is called on every child process that finished processing.
    @@ run-command.h: struct run_process_parallel_opts
      	start_failure_fn start_failure;
      
     +	/**
    -+	 * duplicate_output: See duplicate_output_fn() above. This should be
    -+	 * NULL unless process specific output is needed
    ++	 * duplicate_output: See duplicate_output_fn() above. Unless you need
    ++	 * to capture output from child processes, leave this as NULL.
     +	 */
     +	duplicate_output_fn duplicate_output;
     +
    @@ t/helper/test-run-command.c: static int no_job(struct child_process *cp,
     +			void *pp_task_cb UNUSED)
     +{
     +	struct string_list list = STRING_LIST_INIT_DUP;
    ++	struct string_list_item *item;
     +
     +	string_list_split(&list, out->buf + offset, '\n', -1);
    -+	for (size_t i = 0; i < list.nr; i++) {
    -+		if (strlen(list.items[i].string) > 0)
    -+			fprintf(stderr, "duplicate_output: %s\n", list.items[i].string);
    -+	}
    ++	for_each_string_list_item(item, &list)
    ++		fprintf(stderr, "duplicate_output: %s\n", item->string);
     +	string_list_clear(&list, 0);
     +}
     +
    @@ t/t0061-run-command.sh: test_expect_success 'run_command runs in parallel with m
     +	test_must_be_empty out &&
     +	test 4 = $(grep -c "duplicate_output: Hello" err) &&
     +	test 4 = $(grep -c "duplicate_output: World" err) &&
    -+	sed "/duplicate_output/d" err > err1 &&
    ++	sed "/duplicate_output/d" err >err1 &&
     +	test_cmp expect err1
     +'
     +
    @@ t/t0061-run-command.sh: test_expect_success 'run_command runs in parallel with a
     +	test_must_be_empty out &&
     +	test 4 = $(grep -c "duplicate_output: Hello" err) &&
     +	test 4 = $(grep -c "duplicate_output: World" err) &&
    -+	sed "/duplicate_output/d" err > err1 &&
    ++	sed "/duplicate_output/d" err >err1 &&
     +	test_cmp expect err1
     +'
     +
    @@ t/t0061-run-command.sh: test_expect_success 'run_command runs in parallel with m
     +	test_must_be_empty out &&
     +	test 4 = $(grep -c "duplicate_output: Hello" err) &&
     +	test 4 = $(grep -c "duplicate_output: World" err) &&
    -+	sed "/duplicate_output/d" err > err1 &&
    ++	sed "/duplicate_output/d" err >err1 &&
     +	test_cmp expect err1
     +'
     +
2:  d00a18dd84 = 2:  6ded5b6788 submodule: strbuf variable rename
3:  dcda518922 = 3:  0c71cea8cd submodule: move status parsing into function
4:  c6fc5ba13b ! 4:  5c8cc93f9f submodule: refactor is_submodule_modified()
    @@ submodule.c: static int config_update_recurse_submodules = RECURSE_SUBMODULES_OF
      static int initialized_fetch_ref_tips;
      static struct oid_array ref_tips_before_fetch;
      static struct oid_array ref_tips_after_fetch;
    -+static const char *status_porcelain_start_error =
    -+	N_("could not run 'git status --porcelain=2' in submodule %s");
    -+static const char *status_porcelain_fail_error =
    -+	N_("'git status --porcelain=2' failed in submodule %s");
    ++#define STATUS_PORCELAIN_START_ERROR \
    ++	N_("could not run 'git status --porcelain=2' in submodule %s")
    ++#define STATUS_PORCELAIN_FAIL_ERROR \
    ++	N_("'git status --porcelain=2' failed in submodule %s")
      
      /*
       * Check if the .gitmodules file is unmerged. Parsing of the .gitmodules file
    @@ submodule.c: unsigned is_submodule_modified(const char *path, int ignore_untrack
     +	prepare_status_porcelain(&cp, path, ignore_untracked);
      	if (start_command(&cp))
     -		die(_("Could not run 'git status --porcelain=2' in submodule %s"), path);
    -+		die(_(status_porcelain_start_error), path);
    ++		die(_(STATUS_PORCELAIN_START_ERROR), path);
      
      	fp = xfdopen(cp.out, "r");
      	while (strbuf_getwholeline(&buf, fp, '\n') != EOF) {
    @@ submodule.c: unsigned is_submodule_modified(const char *path, int ignore_untrack
      
      	if (finish_command(&cp) && !ignore_cp_exit_code)
     -		die(_("'git status --porcelain=2' failed in submodule %s"), path);
    -+		die(_(status_porcelain_fail_error), path);
    ++		die(_(STATUS_PORCELAIN_FAIL_ERROR), path);
      
      	strbuf_release(&buf);
      	return dirty_submodule;
5:  1ea8eae9c9 = 5:  6c2b62abc8 diff-lib: refactor out diff_change logic
6:  0d35fcc38d < -:  ---------- diff-lib: refactor match_stat_with_submodule
7:  fd1eec974d ! 6:  bb25dadbe5 diff-lib: parallelize run_diff_files for submodules
    @@ diff-lib.c: static int check_removed(const struct index_state *istate, const str
     +				     unsigned *ignore_untracked)
      {
      	int changed = ie_match_stat(diffopt->repo->index, ce, st, ce_option);
    - 	struct diff_flags orig_flags;
    +-	if (S_ISGITLINK(ce->ce_mode)) {
    +-		struct diff_flags orig_flags = diffopt->flags;
    +-		if (!diffopt->flags.override_submodule_config)
    +-			set_diffopt_flags_from_submodule_config(diffopt, ce->name);
    +-		if (diffopt->flags.ignore_submodules)
    +-			changed = 0;
    +-		else if (!diffopt->flags.ignore_dirty_submodules &&
    +-			 (!changed || diffopt->flags.dirty_submodules))
    ++	struct diff_flags orig_flags;
     +	int defer = 0;
    - 
    - 	if (!S_ISGITLINK(ce->ce_mode))
    --		return changed;
    ++
    ++	if (!S_ISGITLINK(ce->ce_mode))
     +		goto ret;
    - 
    - 	orig_flags = diffopt->flags;
    - 	if (!diffopt->flags.override_submodule_config)
    -@@ diff-lib.c: static int match_stat_with_submodule(struct diff_options *diffopt,
    - 		goto cleanup;
    - 	}
    - 	if (!diffopt->flags.ignore_dirty_submodules &&
    --	    (!changed || diffopt->flags.dirty_submodules))
    --		*dirty_submodule = is_submodule_modified(ce->name,
    ++
    ++	orig_flags = diffopt->flags;
    ++	if (!diffopt->flags.override_submodule_config)
    ++		set_diffopt_flags_from_submodule_config(diffopt, ce->name);
    ++	if (diffopt->flags.ignore_submodules) {
    ++		changed = 0;
    ++		goto cleanup;
    ++	}
    ++	if (!diffopt->flags.ignore_dirty_submodules &&
     +	    (!changed || diffopt->flags.dirty_submodules)) {
     +		if (defer_submodule_status && *defer_submodule_status) {
     +			defer = 1;
     +			*ignore_untracked = diffopt->flags.ignore_untracked_in_submodules;
     +		} else {
    -+			*dirty_submodule = is_submodule_modified(ce->name,
    - 					 diffopt->flags.ignore_untracked_in_submodules);
    + 			*dirty_submodule = is_submodule_modified(ce->name,
    +-								 diffopt->flags.ignore_untracked_in_submodules);
    +-		diffopt->flags = orig_flags;
    ++					 diffopt->flags.ignore_untracked_in_submodules);
     +		}
    -+	}
    - cleanup:
    - 	diffopt->flags = orig_flags;
    + 	}
    ++cleanup:
    ++	diffopt->flags = orig_flags;
     +ret:
     +	if (defer_submodule_status)
     +		*defer_submodule_status = defer;
    @@ diff-lib.c: int run_diff_files(struct rev_info *revs, unsigned int option)
      				       changed, istate, ce))
      			continue;
      	}
    -+	if (submodules.nr > 0) {
    -+		int parallel_jobs;
    -+		if (git_config_get_int("submodule.diffjobs", &parallel_jobs))
    ++	if (submodules.nr) {
    ++		unsigned long parallel_jobs;
    ++		struct string_list_item *item;
    ++
    ++		if (git_config_get_ulong("submodule.diffjobs", &parallel_jobs))
     +			parallel_jobs = 1;
     +		else if (!parallel_jobs)
     +			parallel_jobs = online_cpus();
    -+		else if (parallel_jobs < 0)
    -+			die(_("submodule.diffjobs cannot be negative"));
     +
     +		if (get_submodules_status(&submodules, parallel_jobs))
     +			die(_("submodule status failed"));
    -+		for (size_t i = 0; i < submodules.nr; i++) {
    -+			struct submodule_status_util *util = submodules.items[i].util;
    ++		for_each_string_list_item(item, &submodules) {
    ++			struct submodule_status_util *util = item->util;
     +
     +			if (diff_change_helper(&revs->diffopt, util->newmode,
     +				       util->dirty_submodule, util->changed,
    @@ submodule.c: int submodule_touches_in_range(struct repository *r,
     +	int result;
     +
     +	struct string_list *submodule_names;
    -+
    -+	/* Pending statuses by OIDs */
    -+	struct status_task **oid_status_tasks;
    -+	int oid_status_tasks_nr, oid_status_tasks_alloc;
     +};
     +
      struct submodule_parallel_fetch {
    @@ submodule.c: unsigned is_submodule_modified(const char *path, int ignore_untrack
     +	struct status_task *task = task_cb;
     +
     +	sps->result = 1;
    -+	strbuf_addf(err,
    -+	    _(status_porcelain_start_error),
    -+	    task->path);
    ++	strbuf_addf(err, _(STATUS_PORCELAIN_START_ERROR), task->path);
     +	return 0;
     +}
     +
    @@ submodule.c: unsigned is_submodule_modified(const char *path, int ignore_untrack
     +
     +	if (retvalue) {
     +		sps->result = 1;
    -+		strbuf_addf(err,
    -+		    _(status_porcelain_fail_error),
    -+		    task->path);
    ++		strbuf_addf(err, _(STATUS_PORCELAIN_FAIL_ERROR), task->path);
     +	}
     +
     +	parse_status_porcelain_strbuf(&task->out,
-- 
2.39.1.519.gcb327c4b5f-goog


  parent reply	other threads:[~2023-02-09  0:02 UTC|newest]

Thread overview: 86+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <https://lore.kernel.org/git/20221108184200.2813458-1-calvinwan@google.com/>
2023-01-04 21:54 ` [PATCH v5 0/6] submodule: parallelize diff Calvin Wan
2023-01-05 23:23   ` Calvin Wan
2023-01-17 19:30   ` [PATCH v6 " Calvin Wan
2023-02-07 18:16     ` [PATCH v7 0/7] " Calvin Wan
2023-02-08  0:55       ` Ævar Arnfjörð Bjarmason
2023-02-09  0:02       ` Calvin Wan [this message]
2023-02-09  1:42         ` [PATCH v8 0/6] " Ævar Arnfjörð Bjarmason
2023-02-09 19:50         ` Junio C Hamano
2023-02-09 21:52           ` Calvin Wan
2023-02-09 22:25             ` Junio C Hamano
2023-02-10 13:24             ` Ævar Arnfjörð Bjarmason
2023-02-10 17:42               ` Junio C Hamano
2023-02-09 20:50         ` Phillip Wood
2023-03-02 21:52         ` [PATCH v9 " Calvin Wan
2023-03-02 22:02           ` [PATCH v9 1/6] run-command: add on_stderr_output_fn to run_processes_parallel_opts Calvin Wan
2023-03-02 22:02           ` [PATCH v9 2/6] submodule: rename strbuf variable Calvin Wan
2023-03-03  0:25             ` Junio C Hamano
2023-03-06 17:37               ` Calvin Wan
2023-03-06 18:30                 ` Junio C Hamano
2023-03-06 19:00                   ` Calvin Wan
2023-03-02 22:02           ` [PATCH v9 3/6] submodule: move status parsing into function Calvin Wan
2023-03-17 20:42             ` Glen Choo
2023-03-02 22:02           ` [PATCH v9 4/6] submodule: refactor is_submodule_modified() Calvin Wan
2023-03-02 22:02           ` [PATCH v9 5/6] diff-lib: refactor out diff_change logic Calvin Wan
2023-03-02 22:02           ` [PATCH v9 6/6] diff-lib: parallelize run_diff_files for submodules Calvin Wan
2023-03-07  8:41             ` Ævar Arnfjörð Bjarmason
2023-03-07 10:21             ` Ævar Arnfjörð Bjarmason
2023-03-07 17:55               ` Junio C Hamano
2023-03-17  1:09             ` Glen Choo
2023-03-17  2:51               ` Glen Choo
2023-02-09  0:02       ` [PATCH v8 1/6] run-command: add duplicate_output_fn to run_processes_parallel_opts Calvin Wan
2023-02-13  6:34         ` Glen Choo
2023-02-13 17:52           ` Junio C Hamano
2023-02-13 18:26             ` Calvin Wan
2023-02-09  0:02       ` [PATCH v8 2/6] submodule: strbuf variable rename Calvin Wan
2023-02-13  8:37         ` Glen Choo
2023-02-09  0:02       ` [PATCH v8 3/6] submodule: move status parsing into function Calvin Wan
2023-02-09  0:02       ` [PATCH v8 4/6] submodule: refactor is_submodule_modified() Calvin Wan
2023-02-13  7:06         ` Glen Choo
2023-02-09  0:02       ` [PATCH v8 5/6] diff-lib: refactor out diff_change logic Calvin Wan
2023-02-09  1:48         ` Ævar Arnfjörð Bjarmason
2023-02-13  8:42         ` Glen Choo
2023-02-13 18:29           ` Calvin Wan
2023-02-14  4:03             ` Glen Choo
2023-02-09  0:02       ` [PATCH v8 6/6] diff-lib: parallelize run_diff_files for submodules Calvin Wan
2023-02-13  8:36         ` Glen Choo
2023-02-07 18:17     ` [PATCH v7 1/7] run-command: add duplicate_output_fn to run_processes_parallel_opts Calvin Wan
2023-02-07 22:16       ` Ævar Arnfjörð Bjarmason
2023-02-08 22:50         ` Calvin Wan
2023-02-08 14:19       ` Phillip Wood
2023-02-08 22:54         ` Calvin Wan
2023-02-09 20:37           ` Phillip Wood
2023-02-07 18:17     ` [PATCH v7 2/7] submodule: strbuf variable rename Calvin Wan
2023-02-07 22:47       ` Ævar Arnfjörð Bjarmason
2023-02-08 22:59         ` Calvin Wan
2023-02-07 18:17     ` [PATCH v7 3/7] submodule: move status parsing into function Calvin Wan
2023-02-07 18:17     ` [PATCH v7 4/7] submodule: refactor is_submodule_modified() Calvin Wan
2023-02-07 22:59       ` Ævar Arnfjörð Bjarmason
2023-02-07 18:17     ` [PATCH v7 5/7] diff-lib: refactor out diff_change logic Calvin Wan
2023-02-08 14:28       ` Phillip Wood
2023-02-08 23:12         ` Calvin Wan
2023-02-09 20:53           ` Phillip Wood
2023-02-07 18:17     ` [PATCH v7 6/7] diff-lib: refactor match_stat_with_submodule Calvin Wan
2023-02-08  8:18       ` Ævar Arnfjörð Bjarmason
2023-02-08 17:07         ` Phillip Wood
2023-02-08 23:13           ` Calvin Wan
2023-02-08 14:22       ` Phillip Wood
2023-02-07 18:17     ` [PATCH v7 7/7] diff-lib: parallelize run_diff_files for submodules Calvin Wan
2023-02-07 23:06       ` Ævar Arnfjörð Bjarmason
2023-01-17 19:30   ` [PATCH v6 1/6] run-command: add duplicate_output_fn to run_processes_parallel_opts Calvin Wan
2023-01-17 19:30   ` [PATCH v6 2/6] submodule: strbuf variable rename Calvin Wan
2023-01-17 19:30   ` [PATCH v6 3/6] submodule: move status parsing into function Calvin Wan
2023-01-17 19:30   ` [PATCH v6 4/6] diff-lib: refactor match_stat_with_submodule Calvin Wan
2023-01-17 19:30   ` [PATCH v6 5/6] diff-lib: parallelize run_diff_files for submodules Calvin Wan
2023-01-26  9:09     ` Glen Choo
2023-01-26  9:16     ` Glen Choo
2023-01-26 18:52       ` Calvin Wan
2023-01-17 19:30   ` [PATCH v6 6/6] submodule: call parallel code from serial status Calvin Wan
2023-01-26  8:09     ` Glen Choo
2023-01-26  8:45       ` Glen Choo
2023-01-04 21:54 ` [PATCH v5 1/6] run-command: add duplicate_output_fn to run_processes_parallel_opts Calvin Wan
2023-01-04 21:54 ` [PATCH v5 2/6] submodule: strbuf variable rename Calvin Wan
2023-01-04 21:54 ` [PATCH v5 3/6] submodule: move status parsing into function Calvin Wan
2023-01-04 21:54 ` [PATCH v5 4/6] diff-lib: refactor match_stat_with_submodule Calvin Wan
2023-01-04 21:54 ` [PATCH v5 5/6] diff-lib: parallelize run_diff_files for submodules Calvin Wan
2023-01-04 21:54 ` [PATCH v5 6/6] submodule: call parallel code from serial status Calvin Wan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230209000212.1892457-1-calvinwan@google.com \
    --to=calvinwan@google.com \
    --cc=avarab@gmail.com \
    --cc=chooglen@google.com \
    --cc=git@vger.kernel.org \
    --cc=jonathantanmy@google.com \
    --cc=newren@gmail.com \
    --cc=phillip.wood123@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.