git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "SZEDER Gábor" <szeder.dev@gmail.com>
To: git@vger.kernel.org
Cc: "Junio C Hamano" <gitster@pobox.com>,
	"Garima Singh" <garima.singh@microsoft.com>,
	"Derrick Stolee" <stolee@gmail.com>,
	"Jakub Narebski" <jnareb@gmail.com>, "Jeff King" <peff@peff.net>,
	"Taylor Blau" <me@ttaylorr.com>,
	"SZEDER Gábor" <szeder.dev@gmail.com>
Subject: [PATCH 18/34] commit-graph: write "empty" Modified Path Bloom Filter Index chunk
Date: Fri, 29 May 2020 10:50:22 +0200	[thread overview]
Message-ID: <20200529085038.26008-19-szeder.dev@gmail.com> (raw)
In-Reply-To: <20200529085038.26008-1-szeder.dev@gmail.com>

Write the Modifed Path Bloom Filter Index chunk if the
'core.modifiedPathBloomFilters' configuration variable is set.  For
now store only "no Bloom filter for this commit" entries.

The format of the various modified path Bloom filter-related chunks
are not (yet) compatible with split commit-graph files, so ignore the
config setting and don't write modifed path Bloom filters with
'--split'.
---
 Documentation/config/core.txt |  6 ++++++
 commit-graph.c                | 40 +++++++++++++++++++++++++++++++++++
 2 files changed, 46 insertions(+)

diff --git a/Documentation/config/core.txt b/Documentation/config/core.txt
index 9e440b160d..820f7d3bcf 100644
--- a/Documentation/config/core.txt
+++ b/Documentation/config/core.txt
@@ -588,6 +588,12 @@ core.commitGraph::
 	to parse the graph structure of commits. Defaults to true. See
 	linkgit:git-commit-graph[1] for more information.
 
+core.modifiedPathBloomFilters::
+	EXPERIMENTAL!!
+	If true, then Git will store and use Bloom filters in the
+	commit-graph file to speed up pathspec-limited revision walks.
+	Defaults to false.
+
 core.useReplaceRefs::
 	If set to `false`, behave as if the `--no-replace-objects`
 	option was given on the command line. See linkgit:git[1] and
diff --git a/commit-graph.c b/commit-graph.c
index e3adffd58b..28f147a418 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -21,6 +21,7 @@
 #define GRAPH_CHUNKID_DATA 0x43444154 /* "CDAT" */
 #define GRAPH_CHUNKID_EXTRAEDGES 0x45444745 /* "EDGE" */
 #define GRAPH_CHUNKID_BASE 0x42415345 /* "BASE" */
+#define GRAPH_CHUNKID_MODIFIED_PATH_BLOOM_FILTER_INDEX 0x4d504249 /* "MPBI" */
 
 #define GRAPH_DATA_WIDTH (the_hash_algo->rawsz + 16)
 
@@ -33,6 +34,10 @@
 
 #define GRAPH_LAST_EDGE 0x80000000
 
+#define GRAPH_MODIFIED_PATH_BLOOM_FILTER_NONE 0xffffffffffffffff
+
+#define GRAPH_MODIFIED_PATH_BLOOM_FILTER_DEFAULT_NR_HASHES 7
+
 #define GRAPH_HEADER_SIZE 8
 #define GRAPH_FANOUT_SIZE (4 * 256)
 #define GRAPH_CHUNKLOOKUP_WIDTH 12
@@ -791,6 +796,11 @@ struct write_commit_graph_context {
 		 check_oids:1;
 
 	const struct split_commit_graph_opts *split_opts;
+
+	struct modified_path_bloom_filter_context {
+		unsigned use_modified_path_bloom_filters:1;
+		unsigned int num_hashes;
+	} mpbfctx;
 };
 
 static int write_graph_chunk_fanout(struct hashfile *f,
@@ -990,6 +1000,20 @@ static int write_graph_chunk_extra_edges(struct hashfile *f,
 	return 0;
 }
 
+static int write_graph_chunk_modified_path_bloom_index(struct hashfile *f,
+		struct write_commit_graph_context *ctx)
+{
+	const uint64_t no_bloom_filter = GRAPH_MODIFIED_PATH_BLOOM_FILTER_NONE;
+	int i;
+
+	hashwrite(f, &ctx->mpbfctx.num_hashes, sizeof(uint8_t));
+	for (i = 0; i < ctx->commits.nr; i++) {
+		display_progress(ctx->progress, ++ctx->progress_cnt);
+		hashwrite(f, &no_bloom_filter, sizeof(no_bloom_filter));
+	}
+	return 0;
+}
+
 static int oid_compare(const void *_a, const void *_b)
 {
 	const struct object_id *a = (const struct object_id *)_a;
@@ -1428,6 +1452,14 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx)
 		chunks[chunks_nr].write_fn = write_graph_chunk_extra_edges;
 		chunks_nr++;
 	}
+	if (ctx->mpbfctx.use_modified_path_bloom_filters) {
+		ALLOC_GROW(chunks, chunks_nr + 1, chunks_alloc);
+		chunks[chunks_nr].id = GRAPH_CHUNKID_MODIFIED_PATH_BLOOM_FILTER_INDEX;
+		chunks[chunks_nr].size = sizeof(uint8_t) +
+					 ctx->commits.nr * sizeof(uint64_t);
+		chunks[chunks_nr].write_fn = write_graph_chunk_modified_path_bloom_index;
+		chunks_nr++;
+	}
 	if (ctx->num_commit_graphs_after > 1) {
 		ALLOC_GROW(chunks, chunks_nr + 1, chunks_alloc);
 		chunks[chunks_nr].id = GRAPH_CHUNKID_BASE;
@@ -1824,6 +1856,14 @@ int write_commit_graph(const char *obj_dir,
 	ctx->check_oids = flags & COMMIT_GRAPH_WRITE_CHECK_OIDS ? 1 : 0;
 	ctx->split_opts = split_opts;
 
+	git_config_get_bool("core.modifiedPathBloomFilters", &res);
+	if (res && ctx->split) {
+		warning("not writing modified path Bloom filters with --split");
+	} else if (res) {
+		ctx->mpbfctx.use_modified_path_bloom_filters = 1;
+		ctx->mpbfctx.num_hashes = GRAPH_MODIFIED_PATH_BLOOM_FILTER_DEFAULT_NR_HASHES;
+	}
+
 	if (ctx->split) {
 		struct commit_graph *g;
 		prepare_commit_graph(ctx->r);
-- 
2.27.0.rc1.431.g5c813f95dc


  parent reply	other threads:[~2020-05-29  8:51 UTC|newest]

Thread overview: 43+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-05-29  8:50 [PoC PATCH 00/34] An alternative modified path Bloom filters implementation SZEDER Gábor
2020-05-28 22:09 ` Johannes Schindelin
2020-05-29  8:50 ` [PATCH 01/34] tree-walk.c: don't match submodule entries for 'submod/anything' SZEDER Gábor
2020-05-29  8:50 ` [PATCH 02/34] commit-graph: fix parsing the Chunk Lookup table SZEDER Gábor
2020-05-29  8:50 ` [PATCH 03/34] commit-graph-format.txt: all multi-byte numbers are in network byte order SZEDER Gábor
2020-05-29  8:50 ` [PATCH 04/34] commit-slab: add a function to deep free entries on the slab SZEDER Gábor
2020-06-04 16:43   ` Derrick Stolee
2020-06-27 15:56     ` SZEDER Gábor
2020-06-29 11:30       ` Derrick Stolee
2020-05-29  8:50 ` [PATCH 05/34] diff.h: drop diff_tree_oid() & friends' return value SZEDER Gábor
2020-05-29  8:50 ` [PATCH 06/34] commit-graph: clean up #includes SZEDER Gábor
2020-05-29  8:50 ` [PATCH 07/34] commit-graph: simplify parse_commit_graph() #1 SZEDER Gábor
2020-05-29  8:50 ` [PATCH 08/34] commit-graph: simplify parse_commit_graph() #2 SZEDER Gábor
2020-05-29  8:50 ` [PATCH 09/34] commit-graph: simplify write_commit_graph_file() #1 SZEDER Gábor
2020-05-29  8:50 ` [PATCH 10/34] commit-graph: simplify write_commit_graph_file() #2 SZEDER Gábor
2020-05-29  8:50 ` [PATCH 11/34] commit-graph: allocate the 'struct chunk_info' array dinamically SZEDER Gábor
2020-05-29  8:50 ` [PATCH 12/34] commit-graph: unify the signatures of all write_graph_chunk_*() functions SZEDER Gábor
2020-05-29  8:50 ` [PATCH 13/34] commit-graph: simplify write_commit_graph_file() #3 SZEDER Gábor
2020-05-29  8:50 ` [PATCH 14/34] commit-graph: check chunk sizes after writing SZEDER Gábor
2020-05-29  8:50 ` [PATCH 15/34] commit-graph-format.txt: document the modified path Bloom filter chunks SZEDER Gábor
2020-06-02 17:59   ` SZEDER Gábor
2020-05-29  8:50 ` [PATCH 16/34] Add a generic and minimal Bloom filter implementation SZEDER Gábor
2020-05-29  8:50 ` [PATCH 17/34] Import a streaming-capable Murmur3 hash function implementation SZEDER Gábor
2020-05-29  8:50 ` SZEDER Gábor [this message]
2020-05-29  8:50 ` [PATCH 19/34] commit-graph: add commit slab for modified path Bloom filters SZEDER Gábor
2020-05-29  8:50 ` [PATCH 20/34] commit-graph: fill the Modified Path Bloom Filter Index chunk SZEDER Gábor
2020-05-29  8:50 ` [PATCH 21/34] commit-graph: load and use " SZEDER Gábor
2020-05-29  8:50 ` [PATCH 22/34] commit-graph: write the Modified Path Bloom Filters chunk SZEDER Gábor
2020-05-29  8:50 ` [PATCH 23/34] commit-graph: load and use " SZEDER Gábor
2020-05-29  8:50 ` [PATCH 24/34] commit-graph: check all leading directories in modified path Bloom filters SZEDER Gábor
2020-05-29  8:50 ` [PATCH 25/34] commit-graph: check embedded modified path Bloom filters with a mask SZEDER Gábor
2020-05-29  8:50 ` [PATCH 26/34] commit-graph: deduplicate modified path Bloom filters SZEDER Gábor
2020-05-29  8:50 ` [PATCH 27/34] commit-graph: load modified path Bloom filters for merge commits SZEDER Gábor
2020-05-29  8:50 ` [PATCH 28/34] commit-graph: write Modified Path Bloom Filter Merge Index chunk SZEDER Gábor
2020-05-29  8:50 ` [PATCH 29/34] commit-graph: extract init and free write_commit_graph_context SZEDER Gábor
2020-05-29  8:50 ` [PATCH 30/34] commit-graph: move write_commit_graph_reachable below write_commit_graph SZEDER Gábor
2020-05-29  8:50 ` [PATCH 31/34] t7007-show: make the first test compatible with the next patch SZEDER Gábor
2020-05-29  8:50 ` [PATCH 32/34] PoC commit-graph: use revision walk machinery for '--reachable' SZEDER Gábor
2020-05-29  8:50 ` [PATCH 33/34] commit-graph: write modified path Bloom filters in "history order" SZEDER Gábor
2020-05-29  8:50 ` [PATCH 34/34] commit-graph: use modified path Bloom filters with wildcards, if possible SZEDER Gábor
2020-05-29 13:59 ` [PoC PATCH 00/34] An alternative modified path Bloom filters implementation Derrick Stolee
2020-06-01 23:25 ` Taylor Blau
2020-06-02 17:08   ` Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200529085038.26008-19-szeder.dev@gmail.com \
    --to=szeder.dev@gmail.com \
    --cc=garima.singh@microsoft.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=jnareb@gmail.com \
    --cc=me@ttaylorr.com \
    --cc=peff@peff.net \
    --cc=stolee@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).