All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Elijah Newren via GitGitGadget" <gitgitgadget@gmail.com>
To: git@vger.kernel.org
Cc: Jeff King <peff@peff.net>, Elijah Newren <newren@gmail.com>,
	Elijah Newren <newren@gmail.com>
Subject: [PATCH 4/7] merge-ort: switch our strmaps over to using memory pools
Date: Fri, 23 Jul 2021 12:54:54 +0000	[thread overview]
Message-ID: <8231c8e34cd35cc648f87d31e48b0692dd2fe6cd.1627044897.git.gitgitgadget@gmail.com> (raw)
In-Reply-To: <pull.990.git.1627044897.gitgitgadget@gmail.com>

From: Elijah Newren <newren@gmail.com>

For all the strmaps (including strintmaps and strsets) whose memory is
unconditionally freed as part of clear_or_reinit_internal_opts(), switch
them over to using our new memory pool.

For the testcases mentioned in commit 557ac0350d ("merge-ort: begin
performance work; instrument with trace2_region_* calls", 2020-10-28),
this change improves the performance as follows:

                            Before                  After
    no-renames:      202.5  ms ±  3.2  ms    198.1 ms ±  2.6 ms
    mega-renames:      1.072 s ±  0.012 s    715.8 ms ±  4.0 ms
    just-one-mega:   357.3  ms ±  3.9  ms    276.8 ms ±  4.2 ms

Signed-off-by: Elijah Newren <newren@gmail.com>
---
 merge-ort.c | 125 +++++++++++++++++++++++++++++++---------------------
 1 file changed, 75 insertions(+), 50 deletions(-)

diff --git a/merge-ort.c b/merge-ort.c
index 2bca4b71f2a..5fd2a4ccd35 100644
--- a/merge-ort.c
+++ b/merge-ort.c
@@ -539,15 +539,19 @@ static void clear_or_reinit_internal_opts(struct merge_options_internal *opti,
 	void (*strset_func)(struct strset *) =
 		reinitialize ? strset_partial_clear : strset_clear;
 
-	/*
-	 * We marked opti->paths with strdup_strings = 0, so that we
-	 * wouldn't have to make another copy of the fullpath created by
-	 * make_traverse_path from setup_path_info().  But, now that we've
-	 * used it and have no other references to these strings, it is time
-	 * to deallocate them.
-	 */
-	free_strmap_strings(&opti->paths);
-	strmap_func(&opti->paths, 1);
+	if (opti->pool)
+		strmap_func(&opti->paths, 0);
+	else {
+		/*
+		 * We marked opti->paths with strdup_strings = 0, so that
+		 * we wouldn't have to make another copy of the fullpath
+		 * created by make_traverse_path from setup_path_info().
+		 * But, now that we've used it and have no other references
+		 * to these strings, it is time to deallocate them.
+		 */
+		free_strmap_strings(&opti->paths);
+		strmap_func(&opti->paths, 1);
+	}
 
 	/*
 	 * All keys and values in opti->conflicted are a subset of those in
@@ -556,16 +560,19 @@ static void clear_or_reinit_internal_opts(struct merge_options_internal *opti,
 	 */
 	strmap_func(&opti->conflicted, 0);
 
-	/*
-	 * opti->paths_to_free is similar to opti->paths; we created it with
-	 * strdup_strings = 0 to avoid making _another_ copy of the fullpath
-	 * but now that we've used it and have no other references to these
-	 * strings, it is time to deallocate them.  We do so by temporarily
-	 * setting strdup_strings to 1.
-	 */
-	opti->paths_to_free.strdup_strings = 1;
-	string_list_clear(&opti->paths_to_free, 0);
-	opti->paths_to_free.strdup_strings = 0;
+	if (!opti->pool) {
+		/*
+		 * opti->paths_to_free is similar to opti->paths; we
+		 * created it with strdup_strings = 0 to avoid making
+		 * _another_ copy of the fullpath but now that we've used
+		 * it and have no other references to these strings, it is
+		 * time to deallocate them.  We do so by temporarily
+		 * setting strdup_strings to 1.
+		 */
+		opti->paths_to_free.strdup_strings = 1;
+		string_list_clear(&opti->paths_to_free, 0);
+		opti->paths_to_free.strdup_strings = 0;
+	}
 
 	if (opti->attr_index.cache_nr) /* true iff opt->renormalize */
 		discard_index(&opti->attr_index);
@@ -683,7 +690,6 @@ static void path_msg(struct merge_options *opt,
 	strbuf_addch(sb, '\n');
 }
 
-MAYBE_UNUSED
 static void *pool_calloc(struct mem_pool *pool, size_t count, size_t size)
 {
 	if (!pool)
@@ -691,7 +697,6 @@ static void *pool_calloc(struct mem_pool *pool, size_t count, size_t size)
 	return mem_pool_calloc(pool, count, size);
 }
 
-MAYBE_UNUSED
 static void *pool_alloc(struct mem_pool *pool, size_t size)
 {
 	if (!pool)
@@ -699,7 +704,6 @@ static void *pool_alloc(struct mem_pool *pool, size_t size)
 	return mem_pool_alloc(pool, size);
 }
 
-MAYBE_UNUSED
 static void *pool_strndup(struct mem_pool *pool, const char *str, size_t len)
 {
 	if (!pool)
@@ -835,8 +839,9 @@ static void setup_path_info(struct merge_options *opt,
 	assert(!df_conflict || !resolved); /* df_conflict implies !resolved */
 	assert(resolved == (merged_version != NULL));
 
-	mi = xcalloc(1, resolved ? sizeof(struct merged_info) :
-				   sizeof(struct conflict_info));
+	mi = pool_calloc(opt->priv->pool, 1,
+			 resolved ? sizeof(struct merged_info) :
+				    sizeof(struct conflict_info));
 	mi->directory_name = current_dir_name;
 	mi->basename_offset = current_dir_name_len;
 	mi->clean = !!resolved;
@@ -1128,7 +1133,7 @@ static int collect_merge_info_callback(int n,
 	len = traverse_path_len(info, p->pathlen);
 
 	/* +1 in both of the following lines to include the NUL byte */
-	fullpath = xmalloc(len + 1);
+	fullpath = pool_alloc(opt->priv->pool, len + 1);
 	make_traverse_path(fullpath, len + 1, info, p->path, p->pathlen);
 
 	/*
@@ -1383,7 +1388,7 @@ static int handle_deferred_entries(struct merge_options *opt,
 		copy = renames->deferred[side].possible_trivial_merges;
 		strintmap_init_with_options(&renames->deferred[side].possible_trivial_merges,
 					    0,
-					    NULL,
+					    opt->priv->pool,
 					    0);
 		strintmap_for_each_entry(&copy, &iter, entry) {
 			const char *path = entry->key;
@@ -2335,12 +2340,21 @@ static void apply_directory_rename_modifications(struct merge_options *opt,
 	VERIFY_CI(ci);
 
 	/* Find parent directories missing from opt->priv->paths */
-	cur_path = new_path;
+	if (opt->priv->pool) {
+		cur_path = mem_pool_strdup(opt->priv->pool, new_path);
+		free((char*)new_path);
+		new_path = (char *)cur_path;
+	} else {
+		cur_path = new_path;
+	}
+
 	while (1) {
 		/* Find the parent directory of cur_path */
 		char *last_slash = strrchr(cur_path, '/');
 		if (last_slash) {
-			parent_name = xstrndup(cur_path, last_slash - cur_path);
+			parent_name = pool_strndup(opt->priv->pool,
+						   cur_path,
+						   last_slash - cur_path);
 		} else {
 			parent_name = opt->priv->toplevel_dir;
 			break;
@@ -2349,7 +2363,8 @@ static void apply_directory_rename_modifications(struct merge_options *opt,
 		/* Look it up in opt->priv->paths */
 		entry = strmap_get_entry(&opt->priv->paths, parent_name);
 		if (entry) {
-			free((char*)parent_name);
+			if (!opt->priv->pool)
+				free((char*)parent_name);
 			parent_name = entry->key; /* reuse known pointer */
 			break;
 		}
@@ -2376,12 +2391,15 @@ static void apply_directory_rename_modifications(struct merge_options *opt,
 		parent_name = cur_dir;
 	}
 
-	/*
-	 * We are removing old_path from opt->priv->paths.  old_path also will
-	 * eventually need to be freed, but it may still be used by e.g.
-	 * ci->pathnames.  So, store it in another string-list for now.
-	 */
-	string_list_append(&opt->priv->paths_to_free, old_path);
+	if (!opt->priv->pool) {
+		/*
+		 * We are removing old_path from opt->priv->paths.
+		 * old_path also will eventually need to be freed, but it
+		 * may still be used by e.g.  ci->pathnames.  So, store it
+		 * in another string-list for now.
+		 */
+		string_list_append(&opt->priv->paths_to_free, old_path);
+	}
 
 	assert(ci->filemask == 2 || ci->filemask == 4);
 	assert(ci->dirmask == 0);
@@ -2416,7 +2434,8 @@ static void apply_directory_rename_modifications(struct merge_options *opt,
 		new_ci->stages[index].mode = ci->stages[index].mode;
 		oidcpy(&new_ci->stages[index].oid, &ci->stages[index].oid);
 
-		free(ci);
+		if (!opt->priv->pool)
+			free(ci);
 		ci = new_ci;
 	}
 
@@ -3623,7 +3642,8 @@ static void process_entry(struct merge_options *opt,
 		 * the directory to remain here, so we need to move this
 		 * path to some new location.
 		 */
-		CALLOC_ARRAY(new_ci, 1);
+		new_ci = pool_calloc(opt->priv->pool, 1, sizeof(*new_ci));
+
 		/* We don't really want new_ci->merged.result copied, but it'll
 		 * be overwritten below so it doesn't matter.  We also don't
 		 * want any directory mode/oid values copied, but we'll zero
@@ -3713,7 +3733,7 @@ static void process_entry(struct merge_options *opt,
 			const char *a_path = NULL, *b_path = NULL;
 			int rename_a = 0, rename_b = 0;
 
-			new_ci = xmalloc(sizeof(*new_ci));
+			new_ci = pool_alloc(opt->priv->pool, sizeof(*new_ci));
 
 			if (S_ISREG(a_mode))
 				rename_a = 1;
@@ -3777,12 +3797,14 @@ static void process_entry(struct merge_options *opt,
 				strmap_remove(&opt->priv->paths, path, 0);
 				/*
 				 * We removed path from opt->priv->paths.  path
-				 * will also eventually need to be freed, but
-				 * it may still be used by e.g.  ci->pathnames.
-				 * So, store it in another string-list for now.
+				 * will also eventually need to be freed if not
+				 * part of a memory pool...but it may still be
+				 * used by e.g. ci->pathnames.  So, store it in
+				 * another string-list for now in that case.
 				 */
-				string_list_append(&opt->priv->paths_to_free,
-						   path);
+				if (!opt->priv->pool)
+					string_list_append(&opt->priv->paths_to_free,
+							   path);
 			}
 
 			/*
@@ -4322,6 +4344,7 @@ static void merge_start(struct merge_options *opt, struct merge_result *result)
 {
 	struct rename_info *renames;
 	int i;
+	struct mem_pool *pool = NULL;
 
 	/* Sanity checks on opt */
 	trace2_region_enter("merge", "sanity checks", opt->repo);
@@ -4393,9 +4416,10 @@ static void merge_start(struct merge_options *opt, struct merge_result *result)
 #else
 	opt->priv->pool = NULL;
 #endif
+	pool = opt->priv->pool;
 	for (i = MERGE_SIDE1; i <= MERGE_SIDE2; i++) {
 		strintmap_init_with_options(&renames->dirs_removed[i],
-					    NOT_RELEVANT, NULL, 0);
+					    NOT_RELEVANT, pool, 0);
 		strmap_init_with_options(&renames->dir_rename_count[i],
 					 NULL, 1);
 		strmap_init_with_options(&renames->dir_renames[i],
@@ -4409,7 +4433,7 @@ static void merge_start(struct merge_options *opt, struct merge_result *result)
 		 */
 		strintmap_init_with_options(&renames->relevant_sources[i],
 					    -1 /* explicitly invalid */,
-					    NULL, 0);
+					    pool, 0);
 		strmap_init_with_options(&renames->cached_pairs[i],
 					 NULL, 1);
 		strset_init_with_options(&renames->cached_irrelevant[i],
@@ -4419,9 +4443,9 @@ static void merge_start(struct merge_options *opt, struct merge_result *result)
 	}
 	for (i = MERGE_SIDE1; i <= MERGE_SIDE2; i++) {
 		strintmap_init_with_options(&renames->deferred[i].possible_trivial_merges,
-					    0, NULL, 0);
+					    0, pool, 0);
 		strset_init_with_options(&renames->deferred[i].target_dirs,
-					 NULL, 1);
+					 pool, 1);
 		renames->deferred[i].trivial_merges_okay = 1; /* 1 == maybe */
 	}
 
@@ -4434,9 +4458,10 @@ static void merge_start(struct merge_options *opt, struct merge_result *result)
 	 * In contrast, conflicted just has a subset of keys from paths, so
 	 * we don't want to free those (it'd be a duplicate free).
 	 */
-	strmap_init_with_options(&opt->priv->paths, NULL, 0);
-	strmap_init_with_options(&opt->priv->conflicted, NULL, 0);
-	string_list_init(&opt->priv->paths_to_free, 0);
+	strmap_init_with_options(&opt->priv->paths, pool, 0);
+	strmap_init_with_options(&opt->priv->conflicted, pool, 0);
+	if (!opt->priv->pool)
+		string_list_init(&opt->priv->paths_to_free, 0);
 
 	/*
 	 * keys & strbufs in output will sometimes need to outlive "paths",
-- 
gitgitgadget


  parent reply	other threads:[~2021-07-23 12:55 UTC|newest]

Thread overview: 65+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-07-23 12:54 [PATCH 0/7] Final optimization batch (#15): use memory pools Elijah Newren via GitGitGadget
2021-07-23 12:54 ` [PATCH 1/7] diffcore-rename: use a mem_pool for exact rename detection's hashmap Elijah Newren via GitGitGadget
2021-07-23 21:59   ` Eric Sunshine
2021-07-23 22:03     ` Elijah Newren
2021-07-23 12:54 ` [PATCH 2/7] merge-ort: set up a memory pool Elijah Newren via GitGitGadget
2021-07-23 12:54 ` [PATCH 3/7] merge-ort: add pool_alloc, pool_calloc, and pool_strndup wrappers Elijah Newren via GitGitGadget
2021-07-23 22:07   ` Eric Sunshine
2021-07-26 14:36   ` Derrick Stolee
2021-07-28 22:49     ` Elijah Newren
2021-07-29 15:26       ` Jeff King
2021-07-30  2:27         ` Elijah Newren
2021-07-30 16:12           ` Jeff King
2021-07-23 12:54 ` Elijah Newren via GitGitGadget [this message]
2021-07-23 12:54 ` [PATCH 5/7] diffcore-rename, merge-ort: add wrapper functions for filepair alloc/dealloc Elijah Newren via GitGitGadget
2021-07-23 12:54 ` [PATCH 6/7] merge-ort: store filepairs and filespecs in our mem_pool Elijah Newren via GitGitGadget
2021-07-23 12:54 ` [PATCH 7/7] merge-ort: reuse path strings in pool_alloc_filespec Elijah Newren via GitGitGadget
2021-07-26 14:44 ` [PATCH 0/7] Final optimization batch (#15): use memory pools Derrick Stolee
2021-07-28 22:52   ` Elijah Newren
2021-07-29  3:58 ` [PATCH v2 " Elijah Newren via GitGitGadget
2021-07-29  3:58   ` [PATCH v2 1/7] diffcore-rename: use a mem_pool for exact rename detection's hashmap Elijah Newren via GitGitGadget
2021-07-29  3:58   ` [PATCH v2 2/7] merge-ort: add pool_alloc, pool_calloc, and pool_strndup wrappers Elijah Newren via GitGitGadget
2021-07-29  3:58   ` [PATCH v2 3/7] merge-ort: set up a memory pool Elijah Newren via GitGitGadget
2021-07-29  3:58   ` [PATCH v2 4/7] merge-ort: switch our strmaps over to using memory pools Elijah Newren via GitGitGadget
2021-07-29 15:28     ` Jeff King
2021-07-29 18:37       ` Elijah Newren
2021-07-29 20:09         ` Jeff King
2021-07-30  2:30           ` Elijah Newren
2021-07-30 16:12             ` Jeff King
2021-07-30 13:30           ` Ævar Arnfjörð Bjarmason
2021-07-30 14:36             ` Elijah Newren
2021-07-30 16:23               ` Ævar Arnfjörð Bjarmason
2021-07-29  3:58   ` [PATCH v2 5/7] diffcore-rename, merge-ort: add wrapper functions for filepair alloc/dealloc Elijah Newren via GitGitGadget
2021-07-29  3:58   ` [PATCH v2 6/7] merge-ort: store filepairs and filespecs in our mem_pool Elijah Newren via GitGitGadget
2021-07-29  3:58   ` [PATCH v2 7/7] merge-ort: reuse path strings in pool_alloc_filespec Elijah Newren via GitGitGadget
2021-07-29 14:58   ` [PATCH v2 0/7] Final optimization batch (#15): use memory pools Derrick Stolee
2021-07-29 16:20   ` Jeff King
2021-07-29 16:23     ` Jeff King
2021-07-29 19:46       ` Junio C Hamano
2021-07-29 20:48         ` Junio C Hamano
2021-07-29 21:05           ` Elijah Newren
2021-07-29 20:46     ` Elijah Newren
2021-07-29 21:14       ` Jeff King
2021-07-30 11:47   ` [PATCH v3 0/9] " Elijah Newren via GitGitGadget
2021-07-30 11:47     ` [PATCH v3 1/9] merge-ort: rename str{map,intmap,set}_func() Elijah Newren via GitGitGadget
2021-07-30 11:47     ` [PATCH v3 2/9] diffcore-rename: use a mem_pool for exact rename detection's hashmap Elijah Newren via GitGitGadget
2021-07-30 11:47     ` [PATCH v3 3/9] merge-ort: add pool_alloc, pool_calloc, and pool_strndup wrappers Elijah Newren via GitGitGadget
2021-07-30 11:47     ` [PATCH v3 4/9] merge-ort: set up a memory pool Elijah Newren via GitGitGadget
2021-07-30 11:47     ` [PATCH v3 5/9] merge-ort: switch our strmaps over to using memory pools Elijah Newren via GitGitGadget
2021-07-30 11:47     ` [PATCH v3 6/9] diffcore-rename, merge-ort: add wrapper functions for filepair alloc/dealloc Elijah Newren via GitGitGadget
2021-07-30 11:47     ` [PATCH v3 7/9] merge-ort: store filepairs and filespecs in our mem_pool Elijah Newren via GitGitGadget
2021-07-30 11:47     ` [PATCH v3 8/9] merge-ort: reuse path strings in pool_alloc_filespec Elijah Newren via GitGitGadget
2021-07-30 11:47     ` [PATCH v3 9/9] merge-ort: remove compile-time ability to turn off usage of memory pools Elijah Newren via GitGitGadget
2021-07-30 16:24       ` Jeff King
2021-07-31 17:27     ` [PATCH v4 0/9] Final optimization batch (#15): use " Elijah Newren via GitGitGadget
2021-07-31 17:27       ` [PATCH v4 1/9] merge-ort: rename str{map,intmap,set}_func() Elijah Newren via GitGitGadget
2021-07-31 17:27       ` [PATCH v4 2/9] diffcore-rename: use a mem_pool for exact rename detection's hashmap Elijah Newren via GitGitGadget
2021-07-31 17:27       ` [PATCH v4 3/9] merge-ort: add pool_alloc, pool_calloc, and pool_strndup wrappers Elijah Newren via GitGitGadget
2021-07-31 17:27       ` [PATCH v4 4/9] merge-ort: set up a memory pool Elijah Newren via GitGitGadget
2021-07-31 17:27       ` [PATCH v4 5/9] merge-ort: switch our strmaps over to using memory pools Elijah Newren via GitGitGadget
2021-07-31 17:27       ` [PATCH v4 6/9] diffcore-rename, merge-ort: add wrapper functions for filepair alloc/dealloc Elijah Newren via GitGitGadget
2021-07-31 17:27       ` [PATCH v4 7/9] merge-ort: store filepairs and filespecs in our mem_pool Elijah Newren via GitGitGadget
2021-07-31 17:27       ` [PATCH v4 8/9] merge-ort: reuse path strings in pool_alloc_filespec Elijah Newren via GitGitGadget
2021-07-31 17:27       ` [PATCH v4 9/9] merge-ort: remove compile-time ability to turn off usage of memory pools Elijah Newren via GitGitGadget
2021-08-02 15:27       ` [PATCH v4 0/9] Final optimization batch (#15): use " Derrick Stolee
2021-08-03 15:45       ` Jeff King

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=8231c8e34cd35cc648f87d31e48b0692dd2fe6cd.1627044897.git.gitgitgadget@gmail.com \
    --to=gitgitgadget@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=newren@gmail.com \
    --cc=peff@peff.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.