git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Taylor Blau <me@ttaylorr.com>
To: git@vger.kernel.org
Cc: peff@peff.net, dstolee@microsoft.com, szeder.dev@gmail.com
Subject: [PATCH v2 08/14] bloom: use provided 'struct bloom_filter_settings'
Date: Wed, 5 Aug 2020 13:02:42 -0400	[thread overview]
Message-ID: <4deb724fc13e580430628f0ac408685394c8689f.1596646576.git.me@ttaylorr.com> (raw)
In-Reply-To: <cover.1596646576.git.me@ttaylorr.com>

When 'get_or_compute_bloom_filter()' needs to compute a Bloom filter
from scratch, it looks to the default 'struct bloom_filter_settings' in
order to determine the maximum number of changed paths, number of bits
per entry, and so on.

All of these values have so far been constant, and so there was no need
to pass in a pointer from the caller (eg., the one that is stored in the
'struct write_commit_graph_context').

Start passing in a 'struct bloom_filter_settings *' instead of using the
default values to respect graph-specific settings (eg., in the case of
setting 'GIT_TEST_BLOOM_SETTINGS_MAX_CHANGED_PATHS').

In order to have an initialized value for these settings, move its
initialization to earlier in the commit-graph write.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
 bloom.c               | 13 ++++++-------
 bloom.h               |  3 ++-
 commit-graph.c        | 21 ++++++++++-----------
 t/helper/test-bloom.c |  1 +
 4 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/bloom.c b/bloom.c
index a8a21762f4..0cf1962dc5 100644
--- a/bloom.c
+++ b/bloom.c
@@ -180,13 +180,12 @@ static int pathmap_cmp(const void *hashmap_cmp_fn_data,
 struct bloom_filter *get_or_compute_bloom_filter(struct repository *r,
 						 struct commit *c,
 						 int compute_if_not_present,
+						 const struct bloom_filter_settings *settings,
 						 int *computed)
 {
 	struct bloom_filter *filter;
-	struct bloom_filter_settings settings = DEFAULT_BLOOM_FILTER_SETTINGS;
 	int i;
 	struct diff_options diffopt;
-	int max_changes = 512;
 
 	if (computed)
 		*computed = 0;
@@ -211,7 +210,7 @@ struct bloom_filter *get_or_compute_bloom_filter(struct repository *r,
 	repo_diff_setup(r, &diffopt);
 	diffopt.flags.recursive = 1;
 	diffopt.detect_rename = 0;
-	diffopt.max_changes = max_changes;
+	diffopt.max_changes = settings->max_changed_paths;
 	diff_setup_done(&diffopt);
 
 	/* ensure commit is parsed so we have parent information */
@@ -223,7 +222,7 @@ struct bloom_filter *get_or_compute_bloom_filter(struct repository *r,
 		diff_tree_oid(NULL, &c->object.oid, "", &diffopt);
 	diffcore_std(&diffopt);
 
-	if (diffopt.num_changes <= max_changes) {
+	if (diffopt.num_changes <= settings->max_changed_paths) {
 		struct hashmap pathmap;
 		struct pathmap_hash_entry *e;
 		struct hashmap_iter iter;
@@ -260,13 +259,13 @@ struct bloom_filter *get_or_compute_bloom_filter(struct repository *r,
 			diff_free_filepair(diff_queued_diff.queue[i]);
 		}
 
-		filter->len = (hashmap_get_size(&pathmap) * settings.bits_per_entry + BITS_PER_WORD - 1) / BITS_PER_WORD;
+		filter->len = (hashmap_get_size(&pathmap) * settings->bits_per_entry + BITS_PER_WORD - 1) / BITS_PER_WORD;
 		filter->data = xcalloc(filter->len, sizeof(unsigned char));
 
 		hashmap_for_each_entry(&pathmap, &iter, e, entry) {
 			struct bloom_key key;
-			fill_bloom_key(e->path, strlen(e->path), &key, &settings);
-			add_key_to_filter(&key, filter, &settings);
+			fill_bloom_key(e->path, strlen(e->path), &key, settings);
+			add_key_to_filter(&key, filter, settings);
 		}
 
 		hashmap_free_entries(&pathmap, struct pathmap_hash_entry, entry);
diff --git a/bloom.h b/bloom.h
index c4107a6415..2353c74a24 100644
--- a/bloom.h
+++ b/bloom.h
@@ -92,11 +92,12 @@ void init_bloom_filters(void);
 struct bloom_filter *get_or_compute_bloom_filter(struct repository *r,
 						 struct commit *c,
 						 int compute_if_not_present,
+						 const struct bloom_filter_settings *settings,
 						 int *computed);
 
 #define DEFAULT_BLOOM_MAX_CHANGES 512
 #define get_bloom_filter(r, c) get_or_compute_bloom_filter( \
-	(r), (c), 0, NULL)
+	(r), (c), 0, NULL, NULL)
 
 int bloom_filter_contains(const struct bloom_filter *filter,
 			  const struct bloom_key *key,
diff --git a/commit-graph.c b/commit-graph.c
index ba2a2cfb22..48d4697f54 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -1432,6 +1432,7 @@ static void compute_bloom_filters(struct write_commit_graph_context *ctx)
 			ctx->r,
 			c,
 			1,
+			ctx->bloom_settings,
 			&computed);
 		if (computed) {
 			ctx->count_bloom_filter_computed++;
@@ -1688,17 +1689,6 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx)
 	int num_chunks = 3;
 	uint64_t chunk_offset;
 	struct object_id file_hash;
-	struct bloom_filter_settings bloom_settings = DEFAULT_BLOOM_FILTER_SETTINGS;
-
-	if (!ctx->bloom_settings) {
-		bloom_settings.bits_per_entry = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_BITS_PER_ENTRY",
-							      bloom_settings.bits_per_entry);
-		bloom_settings.num_hashes = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_NUM_HASHES",
-							  bloom_settings.num_hashes);
-		bloom_settings.max_changed_paths = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_MAX_CHANGED_PATHS",
-							  bloom_settings.max_changed_paths);
-		ctx->bloom_settings = &bloom_settings;
-	}
 
 	if (ctx->split) {
 		struct strbuf tmp_file = STRBUF_INIT;
@@ -2144,6 +2134,7 @@ int write_commit_graph(struct object_directory *odb,
 	uint32_t i, count_distinct = 0;
 	int res = 0;
 	int replace = 0;
+	struct bloom_filter_settings bloom_settings = DEFAULT_BLOOM_FILTER_SETTINGS;
 
 	if (!commit_graph_compatible(the_repository))
 		return 0;
@@ -2157,6 +2148,14 @@ int write_commit_graph(struct object_directory *odb,
 	ctx->split_opts = split_opts;
 	ctx->total_bloom_filter_data_size = 0;
 
+	bloom_settings.bits_per_entry = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_BITS_PER_ENTRY",
+						      bloom_settings.bits_per_entry);
+	bloom_settings.num_hashes = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_NUM_HASHES",
+						  bloom_settings.num_hashes);
+	bloom_settings.max_changed_paths = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_MAX_CHANGED_PATHS",
+							 bloom_settings.max_changed_paths);
+	ctx->bloom_settings = &bloom_settings;
+
 	if (flags & COMMIT_GRAPH_WRITE_BLOOM_FILTERS)
 		ctx->changed_paths = 1;
 	if (!(flags & COMMIT_GRAPH_NO_WRITE_BLOOM_FILTERS)) {
diff --git a/t/helper/test-bloom.c b/t/helper/test-bloom.c
index 531af439c2..4af949164c 100644
--- a/t/helper/test-bloom.c
+++ b/t/helper/test-bloom.c
@@ -40,6 +40,7 @@ static void get_bloom_filter_for_commit(const struct object_id *commit_oid)
 	setup_git_directory();
 	c = lookup_commit(the_repository, commit_oid);
 	filter = get_or_compute_bloom_filter(the_repository, c, 1,
+					     &settings,
 					     NULL);
 	print_bloom_filter(filter);
 }
-- 
2.28.0.rc1.13.ge78abce653


  parent reply	other threads:[~2020-08-05 17:05 UTC|newest]

Thread overview: 117+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-08-03 18:57 [PATCH 00/10] more miscellaneous Bloom filter improvements Taylor Blau
2020-08-03 18:57 ` [PATCH 01/10] commit-graph: introduce 'get_bloom_filter_settings()' Taylor Blau
2020-08-04  7:24   ` Jeff King
2020-08-04 20:08     ` Taylor Blau
2020-08-03 18:57 ` [PATCH 02/10] commit-graph: pass a 'struct repository *' in more places Taylor Blau
2020-08-03 18:57 ` [PATCH 03/10] t4216: use an '&&'-chain Taylor Blau
2020-08-03 18:57 ` [PATCH 04/10] t/helper/test-read-graph.c: prepare repo settings Taylor Blau
2020-08-03 18:57 ` [PATCH 05/10] commit-graph: respect 'commitgraph.readChangedPaths' Taylor Blau
2020-08-03 18:57 ` [PATCH 06/10] commit-graph.c: sort index into commits list Taylor Blau
2020-08-04 12:31   ` Derrick Stolee
2020-08-04 20:10     ` Taylor Blau
2020-08-03 18:57 ` [PATCH 07/10] commit-graph: add large-filters bitmap chunk Taylor Blau
2020-08-03 18:59   ` Taylor Blau
2020-08-04 12:57   ` Derrick Stolee
2020-08-03 18:57 ` [PATCH 08/10] bloom: split 'get_bloom_filter()' in two Taylor Blau
2020-08-04 13:00   ` Derrick Stolee
2020-08-04 20:12     ` Taylor Blau
2020-08-03 18:57 ` [PATCH 09/10] commit-graph: rename 'split_commit_graph_opts' Taylor Blau
2020-08-03 18:57 ` [PATCH 10/10] builtin/commit-graph.c: introduce '--max-new-filters=<n>' Taylor Blau
2020-08-04 13:03   ` Derrick Stolee
2020-08-04 20:14     ` Taylor Blau
2020-08-05 17:01 ` [PATCH v2 00/14] more miscellaneous Bloom filter improvements Taylor Blau
2020-08-05 17:01   ` [PATCH v2 01/14] commit-graph: introduce 'get_bloom_filter_settings()' Taylor Blau
2020-08-05 17:02   ` [PATCH v2 02/14] t4216: use an '&&'-chain Taylor Blau
2020-08-05 17:02   ` [PATCH v2 03/14] commit-graph: pass a 'struct repository *' in more places Taylor Blau
2020-08-05 17:02   ` [PATCH v2 04/14] t/helper/test-read-graph.c: prepare repo settings Taylor Blau
2020-08-05 17:02   ` [PATCH v2 05/14] commit-graph: respect 'commitGraph.readChangedPaths' Taylor Blau
2020-08-05 17:02   ` [PATCH v2 06/14] commit-graph.c: store maximum changed paths Taylor Blau
2020-08-05 17:02   ` [PATCH v2 07/14] bloom: split 'get_bloom_filter()' in two Taylor Blau
2020-08-05 17:02   ` Taylor Blau [this message]
2020-08-05 17:02   ` [PATCH v2 09/14] bloom/diff: properly short-circuit on max_changes Taylor Blau
2020-08-05 17:02   ` [PATCH v2 10/14] commit-graph.c: sort index into commits list Taylor Blau
2020-08-05 17:02   ` [PATCH v2 11/14] csum-file.h: introduce 'hashwrite_be64()' Taylor Blau
2020-08-05 17:02   ` [PATCH v2 12/14] commit-graph: add large-filters bitmap chunk Taylor Blau
2020-08-05 21:01     ` Junio C Hamano
2020-08-05 21:17       ` Taylor Blau
2020-08-05 22:21         ` Junio C Hamano
2020-08-05 22:25           ` Taylor Blau
2020-08-11 13:48             ` Taylor Blau
2020-08-11 18:59               ` Junio C Hamano
2020-08-05 17:03   ` [PATCH v2 13/14] commit-graph: rename 'split_commit_graph_opts' Taylor Blau
2020-08-05 17:03   ` [PATCH v2 14/14] builtin/commit-graph.c: introduce '--max-new-filters=<n>' Taylor Blau
2020-08-11 20:51 ` [PATCH v3 00/14] more miscellaneous Bloom filter improvements Taylor Blau
2020-08-11 20:51   ` [PATCH v3 01/14] commit-graph: introduce 'get_bloom_filter_settings()' Taylor Blau
2020-08-11 21:18     ` SZEDER Gábor
2020-08-11 21:21       ` Taylor Blau
2020-08-11 21:27         ` SZEDER Gábor
2020-08-11 21:34           ` Taylor Blau
2020-08-11 23:55             ` SZEDER Gábor
2020-08-12 11:48               ` Derrick Stolee
2020-08-14 20:17                 ` Taylor Blau
2020-08-11 20:51   ` [PATCH v3 02/14] t4216: use an '&&'-chain Taylor Blau
2020-08-11 20:51   ` [PATCH v3 03/14] commit-graph: pass a 'struct repository *' in more places Taylor Blau
2020-08-11 20:51   ` [PATCH v3 04/14] t/helper/test-read-graph.c: prepare repo settings Taylor Blau
2020-08-11 20:51   ` [PATCH v3 05/14] commit-graph: respect 'commitGraph.readChangedPaths' Taylor Blau
2020-08-11 20:51   ` [PATCH v3 06/14] commit-graph.c: store maximum changed paths Taylor Blau
2020-08-11 20:51   ` [PATCH v3 07/14] bloom: split 'get_bloom_filter()' in two Taylor Blau
2020-08-11 20:51   ` [PATCH v3 11/14] csum-file.h: introduce 'hashwrite_be64()' Taylor Blau
2020-08-11 20:51   ` [PATCH v3 08/14] bloom: use provided 'struct bloom_filter_settings' Taylor Blau
2020-08-11 20:51   ` [PATCH v3 09/14] bloom/diff: properly short-circuit on max_changes Taylor Blau
2020-08-11 20:52   ` [PATCH v3 10/14] commit-graph.c: sort index into commits list Taylor Blau
2020-08-11 20:52   ` [PATCH v3 12/14] commit-graph: add large-filters bitmap chunk Taylor Blau
2020-08-11 21:11     ` Derrick Stolee
2020-08-11 21:18       ` Taylor Blau
2020-08-11 22:05         ` Taylor Blau
2020-08-19 13:35     ` SZEDER Gábor
2020-09-02 20:23       ` Taylor Blau
2020-09-01 14:35     ` SZEDER Gábor
2020-09-02 20:40       ` Taylor Blau
2020-08-11 20:52   ` [PATCH v3 13/14] commit-graph: rename 'split_commit_graph_opts' Taylor Blau
2020-08-19  9:56     ` SZEDER Gábor
2020-09-02 21:02       ` Taylor Blau
2020-08-11 20:52   ` [PATCH v3 14/14] builtin/commit-graph.c: introduce '--max-new-filters=<n>' Taylor Blau
2020-08-12 11:49     ` SZEDER Gábor
2020-08-14 20:20       ` Taylor Blau
2020-08-17 22:50         ` SZEDER Gábor
2020-09-02 21:03           ` Taylor Blau
2020-08-12 12:29     ` Derrick Stolee
2020-08-14 20:10       ` Taylor Blau
2020-08-18 22:23     ` SZEDER Gábor
2020-09-03 16:35       ` Taylor Blau
2020-08-19  8:20     ` SZEDER Gábor
2020-09-03 16:42       ` Taylor Blau
2020-09-04  8:50         ` SZEDER Gábor
2020-09-01 14:36     ` SZEDER Gábor
2020-09-03 18:49       ` Taylor Blau
2020-09-03 21:45   ` [PATCH v3 00/14] more miscellaneous Bloom filter improvements Junio C Hamano
2020-09-03 22:33     ` Taylor Blau
2020-09-03 22:45 ` [PATCH v4 " Taylor Blau
2020-09-03 22:46   ` [PATCH v4 01/14] commit-graph: introduce 'get_bloom_filter_settings()' Taylor Blau
2020-09-03 22:46   ` [PATCH v4 02/14] t4216: use an '&&'-chain Taylor Blau
2020-09-03 22:46   ` [PATCH v4 03/14] commit-graph: pass a 'struct repository *' in more places Taylor Blau
2020-09-03 22:46   ` [PATCH v4 04/14] t/helper/test-read-graph.c: prepare repo settings Taylor Blau
2020-09-03 22:46   ` [PATCH v4 05/14] commit-graph: respect 'commitGraph.readChangedPaths' Taylor Blau
2020-09-03 22:46   ` [PATCH v4 06/14] commit-graph.c: store maximum changed paths Taylor Blau
2020-09-03 22:46   ` [PATCH v4 07/14] bloom: split 'get_bloom_filter()' in two Taylor Blau
2020-09-05 17:22     ` Jakub Narębski
2020-09-05 17:38       ` Taylor Blau
2020-09-05 17:50         ` Jakub Narębski
2020-09-05 18:01           ` Taylor Blau
2020-09-05 18:18             ` Jakub Narębski
2020-09-05 18:38               ` Taylor Blau
2020-09-05 18:55                 ` Taylor Blau
2020-09-05 19:04                   ` SZEDER Gábor
2020-09-05 19:49                     ` Taylor Blau
2020-09-06 21:52                       ` Junio C Hamano
2020-09-03 22:46   ` [PATCH v4 08/14] bloom: use provided 'struct bloom_filter_settings' Taylor Blau
2020-09-03 22:46   ` [PATCH v4 09/14] bloom/diff: properly short-circuit on max_changes Taylor Blau
2020-09-03 22:46   ` [PATCH v4 10/14] commit-graph.c: sort index into commits list Taylor Blau
2020-09-03 22:46   ` [PATCH v4 11/14] csum-file.h: introduce 'hashwrite_be64()' Taylor Blau
2020-09-04 20:18     ` René Scharfe
2020-09-04 20:22       ` Taylor Blau
2020-09-03 22:46   ` [PATCH v4 12/14] commit-graph: add large-filters bitmap chunk Taylor Blau
2020-09-03 22:46   ` [PATCH v4 13/14] commit-graph: rename 'split_commit_graph_opts' Taylor Blau
2020-09-04 15:20     ` Taylor Blau
2020-09-03 22:47   ` [PATCH v4 14/14] builtin/commit-graph.c: introduce '--max-new-filters=<n>' Taylor Blau
2020-09-04 14:39   ` [PATCH v4 00/14] more miscellaneous Bloom filter improvements Derrick Stolee

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4deb724fc13e580430628f0ac408685394c8689f.1596646576.git.me@ttaylorr.com \
    --to=me@ttaylorr.com \
    --cc=dstolee@microsoft.com \
    --cc=git@vger.kernel.org \
    --cc=peff@peff.net \
    --cc=szeder.dev@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).