From: "Son Luong Ngoc via GitGitGadget" <gitgitgadget@gmail.com>
To: git@vger.kernel.org
Cc: Son Luong Ngoc <sluongng@gmail.com>, Son Luong Ngoc <sluongng@gmail.com>
Subject: [PATCH] commit-graph: add verify changed paths option
Date: Fri, 31 Jul 2020 07:49:25 +0000 [thread overview]
Message-ID: <pull.687.git.1596181765336.gitgitgadget@gmail.com> (raw)
From: Son Luong Ngoc <sluongng@gmail.com>
Add '--has-changed-paths' option to 'git commit-graph verify' subcommand
to validate whether the commit-graph was written with '--changed-paths'
option.
Signed-off-by: Son Luong Ngoc <sluongng@gmail.com>
---
Commit-Graph: Verify bloom filter
When I was working on git-care(1) and Gitaly(2), the need to check
whether a commit-graph (split or non-split) were built with Bloom
filter. This is needed especially when a repository primary commit-graph
write strategy is '--split' and the bottom chains might rarely be
re-written (or never) thus Bloom filter is never applied to the graph.
Provides users with a straight forward way to validate the existence of
Bloom filter chunks to save user having to read the commit-graph
manually as show in (1) and (2).
References:
1. https://github.com/sluongng/git-care/commit/d0feaa381ea3ec7b0e617c6596ad6e3cf16b884a
2. https://gitlab.com/sluongng/gitaly/-/commit/78dba8b73e720b11500482b19b755346ec853025
------------------------------------------------------------------------
It's probably going to take me a bit more time to write up some tests
for this, so I want to send it out first for comments.
Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-687%2Fsluongng%2Fsluongngoc%2Fverify-bloom-filter-v1
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-687/sluongng/sluongngoc/verify-bloom-filter-v1
Pull-Request: https://github.com/gitgitgadget/git/pull/687
builtin/commit-graph.c | 12 +++++++++---
commit-graph.c | 22 +++++++++++++++++-----
commit-graph.h | 12 +++++++++---
3 files changed, 35 insertions(+), 11 deletions(-)
diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c
index 16c9f6101a..ce8a7cbe90 100644
--- a/builtin/commit-graph.c
+++ b/builtin/commit-graph.c
@@ -18,7 +18,8 @@ static char const * const builtin_commit_graph_usage[] = {
};
static const char * const builtin_commit_graph_verify_usage[] = {
- N_("git commit-graph verify [--object-dir <objdir>] [--shallow] [--[no-]progress]"),
+ N_("git commit-graph verify [--object-dir <objdir>] [--shallow] "
+ "[--has-changed-paths] [--[no-]progress]"),
NULL
};
@@ -37,6 +38,7 @@ static struct opts_commit_graph {
int append;
int split;
int shallow;
+ int has_changed_paths;
int progress;
int enable_changed_paths;
} opts;
@@ -71,12 +73,14 @@ static int graph_verify(int argc, const char **argv)
int open_ok;
int fd;
struct stat st;
- int flags = 0;
+ enum commit_graph_verify_flags flags = 0;
static struct option builtin_commit_graph_verify_options[] = {
OPT_STRING(0, "object-dir", &opts.obj_dir,
N_("dir"),
N_("The object directory to store the graph")),
+ OPT_BOOL(0, "has-changed-paths", &opts.has_changed_paths,
+ N_("verify that the commit-graph includes changed paths")),
OPT_BOOL(0, "shallow", &opts.shallow,
N_("if the commit-graph is split, only verify the tip file")),
OPT_BOOL(0, "progress", &opts.progress, N_("force progress reporting")),
@@ -94,8 +98,10 @@ static int graph_verify(int argc, const char **argv)
opts.obj_dir = get_object_directory();
if (opts.shallow)
flags |= COMMIT_GRAPH_VERIFY_SHALLOW;
+ if (opts.has_changed_paths)
+ flags |= COMMIT_GRAPH_VERIFY_CHANGED_PATHS;
if (opts.progress)
- flags |= COMMIT_GRAPH_WRITE_PROGRESS;
+ flags |= COMMIT_GRAPH_VERIFY_PROGRESS;
odb = find_odb(the_repository, opts.obj_dir);
graph_name = get_commit_graph_filename(odb);
diff --git a/commit-graph.c b/commit-graph.c
index 1af68c297d..d83f5a2325 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -250,7 +250,7 @@ struct commit_graph *load_commit_graph_one_fd_st(int fd, struct stat *st,
return ret;
}
-static int verify_commit_graph_lite(struct commit_graph *g)
+static int verify_commit_graph_lite(struct commit_graph *g, int verify_changed_path)
{
/*
* Basic validation shared between parse_commit_graph()
@@ -276,6 +276,16 @@ static int verify_commit_graph_lite(struct commit_graph *g)
error("commit-graph is missing the Commit Data chunk");
return 1;
}
+ if (verify_changed_path) {
+ if (!g->chunk_bloom_indexes) {
+ error("commit-graph is missing Bloom Index chunk");
+ return 1;
+ }
+ if (!g->chunk_bloom_data) {
+ error("commit-graph is missing Bloom Data chunk");
+ return 1;
+ }
+ }
return 0;
}
@@ -439,7 +449,7 @@ struct commit_graph *parse_commit_graph(void *graph_map, size_t graph_size)
hashcpy(graph->oid.hash, graph->data + graph->data_len - graph->hash_len);
- if (verify_commit_graph_lite(graph))
+ if (verify_commit_graph_lite(graph, 0))
goto free_and_return;
return graph;
@@ -2216,7 +2226,9 @@ static void graph_report(const char *fmt, ...)
#define GENERATION_ZERO_EXISTS 1
#define GENERATION_NUMBER_EXISTS 2
-int verify_commit_graph(struct repository *r, struct commit_graph *g, int flags)
+int verify_commit_graph(struct repository *r,
+ struct commit_graph *g,
+ enum commit_graph_verify_flags flags)
{
uint32_t i, cur_fanout_pos = 0;
struct object_id prev_oid, cur_oid, checksum;
@@ -2231,7 +2243,7 @@ int verify_commit_graph(struct repository *r, struct commit_graph *g, int flags)
return 1;
}
- verify_commit_graph_error = verify_commit_graph_lite(g);
+ verify_commit_graph_error = verify_commit_graph_lite(g, flags & COMMIT_GRAPH_VERIFY_CHANGED_PATHS);
if (verify_commit_graph_error)
return verify_commit_graph_error;
@@ -2284,7 +2296,7 @@ int verify_commit_graph(struct repository *r, struct commit_graph *g, int flags)
if (verify_commit_graph_error & ~VERIFY_COMMIT_GRAPH_ERROR_HASH)
return verify_commit_graph_error;
- if (flags & COMMIT_GRAPH_WRITE_PROGRESS)
+ if (flags & COMMIT_GRAPH_VERIFY_PROGRESS)
progress = start_progress(_("Verifying commits in commit graph"),
g->num_commits);
diff --git a/commit-graph.h b/commit-graph.h
index 28f89cdf3e..29c01b5000 100644
--- a/commit-graph.h
+++ b/commit-graph.h
@@ -94,6 +94,12 @@ enum commit_graph_write_flags {
COMMIT_GRAPH_WRITE_BLOOM_FILTERS = (1 << 3),
};
+enum commit_graph_verify_flags {
+ COMMIT_GRAPH_VERIFY_SHALLOW = (1 << 0),
+ COMMIT_GRAPH_VERIFY_CHANGED_PATHS = (1 << 1),
+ COMMIT_GRAPH_VERIFY_PROGRESS = (1 << 2),
+};
+
enum commit_graph_split_flags {
COMMIT_GRAPH_SPLIT_UNSPECIFIED = 0,
COMMIT_GRAPH_SPLIT_MERGE_PROHIBITED = 1,
@@ -122,9 +128,9 @@ int write_commit_graph(struct object_directory *odb,
enum commit_graph_write_flags flags,
const struct split_commit_graph_opts *split_opts);
-#define COMMIT_GRAPH_VERIFY_SHALLOW (1 << 0)
-
-int verify_commit_graph(struct repository *r, struct commit_graph *g, int flags);
+int verify_commit_graph(struct repository *r,
+ struct commit_graph *g,
+ enum commit_graph_verify_flags flags);
void close_commit_graph(struct raw_object_store *);
void free_commit_graph(struct commit_graph *);
base-commit: 47ae905ffb98cc4d4fd90083da6bc8dab55d9ecc
--
gitgitgadget
next reply other threads:[~2020-07-31 7:49 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-07-31 7:49 Son Luong Ngoc via GitGitGadget [this message]
2020-07-31 16:21 ` [PATCH] commit-graph: add verify changed paths option Christian Couder
2020-07-31 17:14 ` Junio C Hamano
2020-07-31 18:06 ` Taylor Blau
2020-07-31 18:02 ` Jeff King
2020-07-31 18:09 ` Taylor Blau
2020-07-31 19:14 ` Jeff King
2020-07-31 19:31 ` Son Luong Ngoc
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=pull.687.git.1596181765336.gitgitgadget@gmail.com \
--to=gitgitgadget@gmail.com \
--cc=git@vger.kernel.org \
--cc=sluongng@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).