* [PATCH v2 1/4] treewide: rename tree to maybe_tree
2018-04-06 19:09 ` [PATCH v2 0/4] " Derrick Stolee
@ 2018-04-06 19:09 ` Derrick Stolee
2018-04-06 19:09 ` [PATCH v2 2/4] commit: create get_commit_tree() method Derrick Stolee
` (4 subsequent siblings)
5 siblings, 0 replies; 26+ messages in thread
From: Derrick Stolee @ 2018-04-06 19:09 UTC (permalink / raw)
To: git; +Cc: peff, sbeller, avarab, larsxschneider, gitster, Derrick Stolee
Using the commit-graph file to walk commit history removes the large
cost of parsing commits during the walk. This exposes a performance
issue: lookup_tree() takes a large portion of the computation time,
even when Git never uses those trees.
In anticipation of lazy-loading these trees, rename the 'tree' member
of struct commit to 'maybe_tree'. This serves two purposes: it hints
at the future role of possibly being NULL even if the commit has a
valid tree, and it allows for unambiguous transformation from simple
member access (i.e. commit->maybe_tree) to method access.
Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
blame.c | 18 +++++++++---------
builtin/checkout.c | 12 ++++++------
builtin/diff.c | 2 +-
builtin/fast-export.c | 6 +++---
builtin/log.c | 4 ++--
builtin/reflog.c | 2 +-
commit-graph.c | 4 ++--
commit.c | 2 +-
commit.h | 2 +-
fsck.c | 6 +++---
http-push.c | 2 +-
line-log.c | 4 ++--
list-objects.c | 10 +++++-----
log-tree.c | 6 +++---
merge-recursive.c | 5 +++--
notes-merge.c | 8 ++++----
packfile.c | 2 +-
pretty.c | 4 ++--
ref-filter.c | 2 +-
revision.c | 8 ++++----
sequencer.c | 12 ++++++------
sha1_name.c | 2 +-
tree.c | 4 ++--
walker.c | 2 +-
24 files changed, 65 insertions(+), 64 deletions(-)
diff --git a/blame.c b/blame.c
index 200e0ad9a2..b78e649cac 100644
--- a/blame.c
+++ b/blame.c
@@ -553,10 +553,10 @@ static struct blame_origin *find_origin(struct commit *parent,
diff_setup_done(&diff_opts);
if (is_null_oid(&origin->commit->object.oid))
- do_diff_cache(&parent->tree->object.oid, &diff_opts);
+ do_diff_cache(&parent->maybe_tree->object.oid, &diff_opts);
else
- diff_tree_oid(&parent->tree->object.oid,
- &origin->commit->tree->object.oid,
+ diff_tree_oid(&parent->maybe_tree->object.oid,
+ &origin->commit->maybe_tree->object.oid,
"", &diff_opts);
diffcore_std(&diff_opts);
@@ -622,10 +622,10 @@ static struct blame_origin *find_rename(struct commit *parent,
diff_setup_done(&diff_opts);
if (is_null_oid(&origin->commit->object.oid))
- do_diff_cache(&parent->tree->object.oid, &diff_opts);
+ do_diff_cache(&parent->maybe_tree->object.oid, &diff_opts);
else
- diff_tree_oid(&parent->tree->object.oid,
- &origin->commit->tree->object.oid,
+ diff_tree_oid(&parent->maybe_tree->object.oid,
+ &origin->commit->maybe_tree->object.oid,
"", &diff_opts);
diffcore_std(&diff_opts);
@@ -1257,10 +1257,10 @@ static void find_copy_in_parent(struct blame_scoreboard *sb,
diff_opts.flags.find_copies_harder = 1;
if (is_null_oid(&target->commit->object.oid))
- do_diff_cache(&parent->tree->object.oid, &diff_opts);
+ do_diff_cache(&parent->maybe_tree->object.oid, &diff_opts);
else
- diff_tree_oid(&parent->tree->object.oid,
- &target->commit->tree->object.oid,
+ diff_tree_oid(&parent->maybe_tree->object.oid,
+ &target->commit->maybe_tree->object.oid,
"", &diff_opts);
if (!diff_opts.flags.find_copies_harder)
diff --git a/builtin/checkout.c b/builtin/checkout.c
index d76e13c852..b15fed5d85 100644
--- a/builtin/checkout.c
+++ b/builtin/checkout.c
@@ -484,7 +484,7 @@ static int merge_working_tree(const struct checkout_opts *opts,
resolve_undo_clear();
if (opts->force) {
- ret = reset_tree(new_branch_info->commit->tree, opts, 1, writeout_error);
+ ret = reset_tree(new_branch_info->commit->maybe_tree, opts, 1, writeout_error);
if (ret)
return ret;
} else {
@@ -570,18 +570,18 @@ static int merge_working_tree(const struct checkout_opts *opts,
o.verbosity = 0;
work = write_tree_from_memory(&o);
- ret = reset_tree(new_branch_info->commit->tree, opts, 1,
+ ret = reset_tree(new_branch_info->commit->maybe_tree, opts, 1,
writeout_error);
if (ret)
return ret;
o.ancestor = old_branch_info->name;
o.branch1 = new_branch_info->name;
o.branch2 = "local";
- ret = merge_trees(&o, new_branch_info->commit->tree, work,
- old_branch_info->commit->tree, &result);
+ ret = merge_trees(&o, new_branch_info->commit->maybe_tree, work,
+ old_branch_info->commit->maybe_tree, &result);
if (ret < 0)
exit(128);
- ret = reset_tree(new_branch_info->commit->tree, opts, 0,
+ ret = reset_tree(new_branch_info->commit->maybe_tree, opts, 0,
writeout_error);
strbuf_release(&o.obuf);
if (ret)
@@ -1002,7 +1002,7 @@ static int parse_branchname_arg(int argc, const char **argv,
*source_tree = parse_tree_indirect(rev);
} else {
parse_commit_or_die(new_branch_info->commit);
- *source_tree = new_branch_info->commit->tree;
+ *source_tree = new_branch_info->commit->maybe_tree;
}
if (!*source_tree) /* case (1): want a tree */
diff --git a/builtin/diff.c b/builtin/diff.c
index 16bfb22f73..34f18a5f3f 100644
--- a/builtin/diff.c
+++ b/builtin/diff.c
@@ -398,7 +398,7 @@ int cmd_diff(int argc, const char **argv, const char *prefix)
if (!obj)
die(_("invalid object '%s' given."), name);
if (obj->type == OBJ_COMMIT)
- obj = &((struct commit *)obj)->tree->object;
+ obj = &((struct commit *)obj)->maybe_tree->object;
if (obj->type == OBJ_TREE) {
obj->flags |= flags;
diff --git a/builtin/fast-export.c b/builtin/fast-export.c
index 27b2cc138e..91e526b30d 100644
--- a/builtin/fast-export.c
+++ b/builtin/fast-export.c
@@ -578,11 +578,11 @@ static void handle_commit(struct commit *commit, struct rev_info *rev,
get_object_mark(&commit->parents->item->object) != 0 &&
!full_tree) {
parse_commit_or_die(commit->parents->item);
- diff_tree_oid(&commit->parents->item->tree->object.oid,
- &commit->tree->object.oid, "", &rev->diffopt);
+ diff_tree_oid(&commit->parents->item->maybe_tree->object.oid,
+ &commit->maybe_tree->object.oid, "", &rev->diffopt);
}
else
- diff_root_tree_oid(&commit->tree->object.oid,
+ diff_root_tree_oid(&commit->maybe_tree->object.oid,
"", &rev->diffopt);
/* Export the referenced blobs, and remember the marks. */
diff --git a/builtin/log.c b/builtin/log.c
index 94ee177d56..f603b53318 100644
--- a/builtin/log.c
+++ b/builtin/log.c
@@ -1067,8 +1067,8 @@ static void make_cover_letter(struct rev_info *rev, int use_stdout,
diff_setup_done(&opts);
- diff_tree_oid(&origin->tree->object.oid,
- &head->tree->object.oid,
+ diff_tree_oid(&origin->maybe_tree->object.oid,
+ &head->maybe_tree->object.oid,
"", &opts);
diffcore_std(&opts);
diff_flush(&opts);
diff --git a/builtin/reflog.c b/builtin/reflog.c
index 4719a5354c..b17eabc009 100644
--- a/builtin/reflog.c
+++ b/builtin/reflog.c
@@ -154,7 +154,7 @@ static int commit_is_complete(struct commit *commit)
for (i = 0; i < found.nr; i++) {
struct commit *c =
(struct commit *)found.objects[i].item;
- if (!tree_is_complete(&c->tree->object.oid)) {
+ if (!tree_is_complete(&c->maybe_tree->object.oid)) {
is_incomplete = 1;
c->object.flags |= INCOMPLETE;
}
diff --git a/commit-graph.c b/commit-graph.c
index 1fc63d541b..005b4a753e 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -258,7 +258,7 @@ static int fill_commit_in_graph(struct commit *item, struct commit_graph *g, uin
item->graph_pos = pos;
hashcpy(oid.hash, commit_data);
- item->tree = lookup_tree(&oid);
+ item->maybe_tree = lookup_tree(&oid);
date_high = get_be32(commit_data + g->hash_len + 8) & 0x3;
date_low = get_be32(commit_data + g->hash_len + 12);
@@ -369,7 +369,7 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len,
uint32_t packedDate[2];
parse_commit(*list);
- hashwrite(f, (*list)->tree->object.oid.hash, hash_len);
+ hashwrite(f, (*list)->maybe_tree->object.oid.hash, hash_len);
parent = (*list)->parents;
diff --git a/commit.c b/commit.c
index 3e39c86abf..fbc092808c 100644
--- a/commit.c
+++ b/commit.c
@@ -335,7 +335,7 @@ int parse_commit_buffer(struct commit *item, const void *buffer, unsigned long s
if (get_sha1_hex(bufptr + 5, parent.hash) < 0)
return error("bad tree pointer in commit %s",
oid_to_hex(&item->object.oid));
- item->tree = lookup_tree(&parent);
+ item->maybe_tree = lookup_tree(&parent);
bufptr += tree_entry_len + 1; /* "tree " + "hex sha1" + "\n" */
pptr = &item->parents;
diff --git a/commit.h b/commit.h
index e57ae4b583..c4d6e6e064 100644
--- a/commit.h
+++ b/commit.h
@@ -22,7 +22,7 @@ struct commit {
unsigned int index;
timestamp_t date;
struct commit_list *parents;
- struct tree *tree;
+ struct tree *maybe_tree;
uint32_t graph_pos;
};
diff --git a/fsck.c b/fsck.c
index 5c8c12dde3..3228ca5bee 100644
--- a/fsck.c
+++ b/fsck.c
@@ -396,9 +396,9 @@ static int fsck_walk_commit(struct commit *commit, void *data, struct fsck_optio
name = get_object_name(options, &commit->object);
if (name)
- put_object_name(options, &commit->tree->object, "%s:", name);
+ put_object_name(options, &commit->maybe_tree->object, "%s:", name);
- result = options->walk((struct object *)commit->tree, OBJ_TREE, data, options);
+ result = options->walk((struct object *)commit->maybe_tree, OBJ_TREE, data, options);
if (result < 0)
return result;
res = result;
@@ -772,7 +772,7 @@ static int fsck_commit_buffer(struct commit *commit, const char *buffer,
err = fsck_ident(&buffer, &commit->object, options);
if (err)
return err;
- if (!commit->tree) {
+ if (!commit->maybe_tree) {
err = report(options, &commit->object, FSCK_MSG_BAD_TREE, "could not load commit's tree %s", sha1_to_hex(tree_sha1));
if (err)
return err;
diff --git a/http-push.c b/http-push.c
index 7dcd9daf62..d83479f32f 100644
--- a/http-push.c
+++ b/http-push.c
@@ -1330,7 +1330,7 @@ static int get_delta(struct rev_info *revs, struct remote_lock *lock)
int count = 0;
while ((commit = get_revision(revs)) != NULL) {
- p = process_tree(commit->tree, p);
+ p = process_tree(commit->maybe_tree, p);
commit->object.flags |= LOCAL;
if (!(commit->object.flags & UNINTERESTING))
count += add_send_request(&commit->object, lock);
diff --git a/line-log.c b/line-log.c
index cdc2257db5..e714969ca2 100644
--- a/line-log.c
+++ b/line-log.c
@@ -817,8 +817,8 @@ static void queue_diffs(struct line_log_data *range,
assert(commit);
DIFF_QUEUE_CLEAR(&diff_queued_diff);
- diff_tree_oid(parent ? &parent->tree->object.oid : NULL,
- &commit->tree->object.oid, "", opt);
+ diff_tree_oid(parent ? &parent->maybe_tree->object.oid : NULL,
+ &commit->maybe_tree->object.oid, "", opt);
if (opt->detect_rename) {
filter_diffs_for_paths(range, 1);
if (diff_might_be_rename())
diff --git a/list-objects.c b/list-objects.c
index 168bef688a..bfd09f545c 100644
--- a/list-objects.c
+++ b/list-objects.c
@@ -195,7 +195,7 @@ static void mark_edge_parents_uninteresting(struct commit *commit,
struct commit *parent = parents->item;
if (!(parent->object.flags & UNINTERESTING))
continue;
- mark_tree_uninteresting(parent->tree);
+ mark_tree_uninteresting(parent->maybe_tree);
if (revs->edge_hint && !(parent->object.flags & SHOWN)) {
parent->object.flags |= SHOWN;
show_edge(parent);
@@ -212,7 +212,7 @@ void mark_edges_uninteresting(struct rev_info *revs, show_edge_fn show_edge)
struct commit *commit = list->item;
if (commit->object.flags & UNINTERESTING) {
- mark_tree_uninteresting(commit->tree);
+ mark_tree_uninteresting(commit->maybe_tree);
if (revs->edge_hint_aggressive && !(commit->object.flags & SHOWN)) {
commit->object.flags |= SHOWN;
show_edge(commit);
@@ -227,7 +227,7 @@ void mark_edges_uninteresting(struct rev_info *revs, show_edge_fn show_edge)
struct commit *commit = (struct commit *)obj;
if (obj->type != OBJ_COMMIT || !(obj->flags & UNINTERESTING))
continue;
- mark_tree_uninteresting(commit->tree);
+ mark_tree_uninteresting(commit->maybe_tree);
if (!(obj->flags & SHOWN)) {
obj->flags |= SHOWN;
show_edge(commit);
@@ -300,8 +300,8 @@ static void do_traverse(struct rev_info *revs,
* an uninteresting boundary commit may not have its tree
* parsed yet, but we are not going to show them anyway
*/
- if (commit->tree)
- add_pending_tree(revs, commit->tree);
+ if (commit->maybe_tree)
+ add_pending_tree(revs, commit->maybe_tree);
show_commit(commit, show_data);
if (revs->tree_blobs_in_commit_order)
diff --git a/log-tree.c b/log-tree.c
index bdf23c5f7b..99499af57c 100644
--- a/log-tree.c
+++ b/log-tree.c
@@ -806,7 +806,7 @@ static int log_tree_diff(struct rev_info *opt, struct commit *commit, struct log
return 0;
parse_commit_or_die(commit);
- oid = &commit->tree->object.oid;
+ oid = &commit->maybe_tree->object.oid;
/* Root commit? */
parents = get_saved_parents(opt, commit);
@@ -831,7 +831,7 @@ static int log_tree_diff(struct rev_info *opt, struct commit *commit, struct log
* we merged _in_.
*/
parse_commit_or_die(parents->item);
- diff_tree_oid(&parents->item->tree->object.oid,
+ diff_tree_oid(&parents->item->maybe_tree->object.oid,
oid, "", &opt->diffopt);
log_tree_diff_flush(opt);
return !opt->loginfo;
@@ -846,7 +846,7 @@ static int log_tree_diff(struct rev_info *opt, struct commit *commit, struct log
struct commit *parent = parents->item;
parse_commit_or_die(parent);
- diff_tree_oid(&parent->tree->object.oid,
+ diff_tree_oid(&parent->maybe_tree->object.oid,
oid, "", &opt->diffopt);
log_tree_diff_flush(opt);
diff --git a/merge-recursive.c b/merge-recursive.c
index 869092f7b9..67886a4ff8 100644
--- a/merge-recursive.c
+++ b/merge-recursive.c
@@ -101,7 +101,7 @@ static struct commit *make_virtual_commit(struct tree *tree, const char *comment
struct commit *commit = alloc_commit_node();
set_merge_remote_desc(commit, comment, (struct object *)commit);
- commit->tree = tree;
+ commit->maybe_tree = tree;
commit->object.parsed = 1;
return commit;
}
@@ -2154,7 +2154,8 @@ int merge_recursive(struct merge_options *o,
read_cache();
o->ancestor = "merged common ancestors";
- clean = merge_trees(o, h1->tree, h2->tree, merged_common_ancestors->tree,
+ clean = merge_trees(o, h1->maybe_tree, h2->maybe_tree,
+ merged_common_ancestors->maybe_tree,
&mrtree);
if (clean < 0) {
flush_output(o);
diff --git a/notes-merge.c b/notes-merge.c
index c09c5e0e47..1d3edc8942 100644
--- a/notes-merge.c
+++ b/notes-merge.c
@@ -600,14 +600,14 @@ int notes_merge(struct notes_merge_options *o,
printf("No merge base found; doing history-less merge\n");
} else if (!bases->next) {
base_oid = &bases->item->object.oid;
- base_tree_oid = &bases->item->tree->object.oid;
+ base_tree_oid = &bases->item->maybe_tree->object.oid;
if (o->verbosity >= 4)
printf("One merge base found (%.7s)\n",
oid_to_hex(base_oid));
} else {
/* TODO: How to handle multiple merge-bases? */
base_oid = &bases->item->object.oid;
- base_tree_oid = &bases->item->tree->object.oid;
+ base_tree_oid = &bases->item->maybe_tree->object.oid;
if (o->verbosity >= 3)
printf("Multiple merge bases found. Using the first "
"(%.7s)\n", oid_to_hex(base_oid));
@@ -634,8 +634,8 @@ int notes_merge(struct notes_merge_options *o,
goto found_result;
}
- result = merge_from_diffs(o, base_tree_oid, &local->tree->object.oid,
- &remote->tree->object.oid, local_tree);
+ result = merge_from_diffs(o, base_tree_oid, &local->maybe_tree->object.oid,
+ &remote->maybe_tree->object.oid, local_tree);
if (result != 0) { /* non-trivial merge (with or without conflicts) */
/* Commit (partial) result */
diff --git a/packfile.c b/packfile.c
index b1d33b646a..3eb9c4a36e 100644
--- a/packfile.c
+++ b/packfile.c
@@ -1925,7 +1925,7 @@ static int add_promisor_object(const struct object_id *oid,
struct commit *commit = (struct commit *) obj;
struct commit_list *parents = commit->parents;
- oidset_insert(set, &commit->tree->object.oid);
+ oidset_insert(set, &commit->maybe_tree->object.oid);
for (; parents; parents = parents->next)
oidset_insert(set, &parents->item->object.oid);
} else if (obj->type == OBJ_TAG) {
diff --git a/pretty.c b/pretty.c
index f7ce490230..42095ea495 100644
--- a/pretty.c
+++ b/pretty.c
@@ -1161,10 +1161,10 @@ static size_t format_commit_one(struct strbuf *sb, /* in UTF-8 */
strbuf_addstr(sb, diff_get_color(c->auto_color, DIFF_RESET));
return 1;
case 'T': /* tree hash */
- strbuf_addstr(sb, oid_to_hex(&commit->tree->object.oid));
+ strbuf_addstr(sb, oid_to_hex(&commit->maybe_tree->object.oid));
return 1;
case 't': /* abbreviated tree hash */
- strbuf_add_unique_abbrev(sb, commit->tree->object.oid.hash,
+ strbuf_add_unique_abbrev(sb, commit->maybe_tree->object.oid.hash,
c->pretty_ctx->abbrev);
return 1;
case 'P': /* parent hashes */
diff --git a/ref-filter.c b/ref-filter.c
index 45fc56216a..783a3ee6cf 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -815,7 +815,7 @@ static void grab_commit_values(struct atom_value *val, int deref, struct object
if (deref)
name++;
if (!strcmp(name, "tree")) {
- v->s = xstrdup(oid_to_hex(&commit->tree->object.oid));
+ v->s = xstrdup(oid_to_hex(&commit->maybe_tree->object.oid));
}
else if (!strcmp(name, "numparent")) {
v->value = commit_list_count(commit->parents);
diff --git a/revision.c b/revision.c
index b42c836d7a..7d66e32e83 100644
--- a/revision.c
+++ b/revision.c
@@ -440,8 +440,8 @@ static void file_change(struct diff_options *options,
static int rev_compare_tree(struct rev_info *revs,
struct commit *parent, struct commit *commit)
{
- struct tree *t1 = parent->tree;
- struct tree *t2 = commit->tree;
+ struct tree *t1 = parent->maybe_tree;
+ struct tree *t2 = commit->maybe_tree;
if (!t1)
return REV_TREE_NEW;
@@ -477,7 +477,7 @@ static int rev_compare_tree(struct rev_info *revs,
static int rev_same_tree_as_empty(struct rev_info *revs, struct commit *commit)
{
int retval;
- struct tree *t1 = commit->tree;
+ struct tree *t1 = commit->maybe_tree;
if (!t1)
return 0;
@@ -615,7 +615,7 @@ static void try_to_simplify_commit(struct rev_info *revs, struct commit *commit)
if (!revs->prune)
return;
- if (!commit->tree)
+ if (!commit->maybe_tree)
return;
if (!commit->parents) {
diff --git a/sequencer.c b/sequencer.c
index f9d1001dee..3b2823c8b5 100644
--- a/sequencer.c
+++ b/sequencer.c
@@ -499,8 +499,8 @@ static int do_recursive_merge(struct commit *base, struct commit *next,
o.show_rename_progress = 1;
head_tree = parse_tree_indirect(head);
- next_tree = next ? next->tree : empty_tree();
- base_tree = base ? base->tree : empty_tree();
+ next_tree = next ? next->maybe_tree : empty_tree();
+ base_tree = base ? base->maybe_tree : empty_tree();
for (xopt = opts->xopts; xopt != opts->xopts + opts->xopts_nr; xopt++)
parse_merge_opt(&o, *xopt);
@@ -561,7 +561,7 @@ static int is_index_unchanged(void)
return error(_("unable to update cache tree"));
return !oidcmp(&active_cache_tree->oid,
- &head_commit->tree->object.oid);
+ &head_commit->maybe_tree->object.oid);
}
static int write_author_script(const char *message)
@@ -1118,7 +1118,7 @@ static int try_to_commit(struct strbuf *msg, const char *author,
}
if (!(flags & ALLOW_EMPTY) && !oidcmp(current_head ?
- ¤t_head->tree->object.oid :
+ ¤t_head->maybe_tree->object.oid :
&empty_tree_oid, &tree)) {
res = 1; /* run 'git commit' to display error message */
goto out;
@@ -1216,12 +1216,12 @@ static int is_original_commit_empty(struct commit *commit)
if (parse_commit(parent))
return error(_("could not parse parent commit %s"),
oid_to_hex(&parent->object.oid));
- ptree_oid = &parent->tree->object.oid;
+ ptree_oid = &parent->maybe_tree->object.oid;
} else {
ptree_oid = the_hash_algo->empty_tree; /* commit is root */
}
- return !oidcmp(ptree_oid, &commit->tree->object.oid);
+ return !oidcmp(ptree_oid, &commit->maybe_tree->object.oid);
}
/*
diff --git a/sha1_name.c b/sha1_name.c
index 735c1c0b8e..26b22cb628 100644
--- a/sha1_name.c
+++ b/sha1_name.c
@@ -896,7 +896,7 @@ struct object *peel_to_type(const char *name, int namelen,
if (o->type == OBJ_TAG)
o = ((struct tag*) o)->tagged;
else if (o->type == OBJ_COMMIT)
- o = &(((struct commit *) o)->tree->object);
+ o = &(((struct commit *) o)->maybe_tree->object);
else {
if (name)
error("%.*s: expected %s type, but the object "
diff --git a/tree.c b/tree.c
index b224115e0f..dbc5e0be54 100644
--- a/tree.c
+++ b/tree.c
@@ -109,7 +109,7 @@ static int read_tree_1(struct tree *tree, struct strbuf *base,
oid_to_hex(entry.oid),
base->buf, entry.path);
- oidcpy(&oid, &commit->tree->object.oid);
+ oidcpy(&oid, &commit->maybe_tree->object.oid);
}
else
continue;
@@ -248,7 +248,7 @@ struct tree *parse_tree_indirect(const struct object_id *oid)
if (obj->type == OBJ_TREE)
return (struct tree *) obj;
else if (obj->type == OBJ_COMMIT)
- obj = &(((struct commit *) obj)->tree->object);
+ obj = &(((struct commit *) obj)->maybe_tree->object);
else if (obj->type == OBJ_TAG)
obj = ((struct tag *) obj)->tagged;
else
diff --git a/walker.c b/walker.c
index dffb9c8e37..1d5f3059a2 100644
--- a/walker.c
+++ b/walker.c
@@ -87,7 +87,7 @@ static int process_commit(struct walker *walker, struct commit *commit)
walker_say(walker, "walk %s\n", oid_to_hex(&commit->object.oid));
if (walker->get_tree) {
- if (process(walker, &commit->tree->object))
+ if (process(walker, &commit->maybe_tree->object))
return -1;
if (!walker->get_all)
walker->get_tree = 0;
--
2.17.0
^ permalink raw reply related [flat|nested] 26+ messages in thread
* [PATCH v2 2/4] commit: create get_commit_tree() method
2018-04-06 19:09 ` [PATCH v2 0/4] " Derrick Stolee
2018-04-06 19:09 ` [PATCH v2 1/4] treewide: rename tree to maybe_tree Derrick Stolee
@ 2018-04-06 19:09 ` Derrick Stolee
2018-04-06 19:09 ` [PATCH v2 3/4] treewide: replace maybe_tree with accessor methods Derrick Stolee
` (3 subsequent siblings)
5 siblings, 0 replies; 26+ messages in thread
From: Derrick Stolee @ 2018-04-06 19:09 UTC (permalink / raw)
To: git; +Cc: peff, sbeller, avarab, larsxschneider, gitster, Derrick Stolee
While walking the commit graph, we load struct commit objects into
the object cache. During this process, we also load struct tree
objects for the root tree of each of these commits. We load these
objects even if we are only computing commit reachability information,
such as a merge base or ahead/behind information.
Create get_commit_tree() as a first step to removing direct
references to the 'maybe_tree' member of struct commit.
Create get_commit_tree_oid() as a shortcut for several references
to "&commit->maybe_tree->object.oid" in the codebase.
Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
commit.c | 10 ++++++++++
commit.h | 3 +++
2 files changed, 13 insertions(+)
diff --git a/commit.c b/commit.c
index fbc092808c..aea2ca1f8b 100644
--- a/commit.c
+++ b/commit.c
@@ -296,6 +296,16 @@ void free_commit_buffer(struct commit *commit)
}
}
+struct tree *get_commit_tree(const struct commit *commit)
+{
+ return commit->maybe_tree;
+}
+
+struct object_id *get_commit_tree_oid(const struct commit *commit)
+{
+ return &get_commit_tree(commit)->object.oid;
+}
+
const void *detach_commit_buffer(struct commit *commit, unsigned long *sizep)
{
struct commit_buffer *v = buffer_slab_peek(&buffer_slab, commit);
diff --git a/commit.h b/commit.h
index c4d6e6e064..dc4bf97d9f 100644
--- a/commit.h
+++ b/commit.h
@@ -102,6 +102,9 @@ void unuse_commit_buffer(const struct commit *, const void *buffer);
*/
void free_commit_buffer(struct commit *);
+struct tree *get_commit_tree(const struct commit *);
+struct object_id *get_commit_tree_oid(const struct commit *);
+
/*
* Disassociate any cached object buffer from the commit, but do not free it.
* The buffer (or NULL, if none) is returned.
--
2.17.0
^ permalink raw reply related [flat|nested] 26+ messages in thread
* [PATCH v2 3/4] treewide: replace maybe_tree with accessor methods
2018-04-06 19:09 ` [PATCH v2 0/4] " Derrick Stolee
2018-04-06 19:09 ` [PATCH v2 1/4] treewide: rename tree to maybe_tree Derrick Stolee
2018-04-06 19:09 ` [PATCH v2 2/4] commit: create get_commit_tree() method Derrick Stolee
@ 2018-04-06 19:09 ` Derrick Stolee
2018-04-06 19:09 ` [PATCH v2 4/4] commit-graph: lazy-load trees for commits Derrick Stolee
` (2 subsequent siblings)
5 siblings, 0 replies; 26+ messages in thread
From: Derrick Stolee @ 2018-04-06 19:09 UTC (permalink / raw)
To: git; +Cc: peff, sbeller, avarab, larsxschneider, gitster, Derrick Stolee
In anticipation of making trees load lazily, create a Coccinelle
script (contrib/coccinelle/commit.cocci) to ensure that all
references to the 'maybe_tree' member of struct commit are either
mutations or accesses through get_commit_tree() or
get_commit_tree_oid().
Apply the Coccinelle script to create the rest of the patch.
Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
blame.c | 18 +++++++++---------
builtin/checkout.c | 18 ++++++++++++------
builtin/diff.c | 2 +-
builtin/fast-export.c | 6 +++---
builtin/log.c | 4 ++--
builtin/reflog.c | 2 +-
commit-graph.c | 2 +-
contrib/coccinelle/commit.cocci | 30 ++++++++++++++++++++++++++++++
fsck.c | 8 +++++---
http-push.c | 2 +-
line-log.c | 4 ++--
list-objects.c | 10 +++++-----
log-tree.c | 6 +++---
merge-recursive.c | 4 ++--
notes-merge.c | 9 +++++----
packfile.c | 2 +-
pretty.c | 5 +++--
ref-filter.c | 2 +-
revision.c | 8 ++++----
sequencer.c | 12 ++++++------
sha1_name.c | 2 +-
tree.c | 4 ++--
walker.c | 2 +-
23 files changed, 101 insertions(+), 61 deletions(-)
create mode 100644 contrib/coccinelle/commit.cocci
diff --git a/blame.c b/blame.c
index b78e649cac..7f5700b324 100644
--- a/blame.c
+++ b/blame.c
@@ -553,10 +553,10 @@ static struct blame_origin *find_origin(struct commit *parent,
diff_setup_done(&diff_opts);
if (is_null_oid(&origin->commit->object.oid))
- do_diff_cache(&parent->maybe_tree->object.oid, &diff_opts);
+ do_diff_cache(get_commit_tree_oid(parent), &diff_opts);
else
- diff_tree_oid(&parent->maybe_tree->object.oid,
- &origin->commit->maybe_tree->object.oid,
+ diff_tree_oid(get_commit_tree_oid(parent),
+ get_commit_tree_oid(origin->commit),
"", &diff_opts);
diffcore_std(&diff_opts);
@@ -622,10 +622,10 @@ static struct blame_origin *find_rename(struct commit *parent,
diff_setup_done(&diff_opts);
if (is_null_oid(&origin->commit->object.oid))
- do_diff_cache(&parent->maybe_tree->object.oid, &diff_opts);
+ do_diff_cache(get_commit_tree_oid(parent), &diff_opts);
else
- diff_tree_oid(&parent->maybe_tree->object.oid,
- &origin->commit->maybe_tree->object.oid,
+ diff_tree_oid(get_commit_tree_oid(parent),
+ get_commit_tree_oid(origin->commit),
"", &diff_opts);
diffcore_std(&diff_opts);
@@ -1257,10 +1257,10 @@ static void find_copy_in_parent(struct blame_scoreboard *sb,
diff_opts.flags.find_copies_harder = 1;
if (is_null_oid(&target->commit->object.oid))
- do_diff_cache(&parent->maybe_tree->object.oid, &diff_opts);
+ do_diff_cache(get_commit_tree_oid(parent), &diff_opts);
else
- diff_tree_oid(&parent->maybe_tree->object.oid,
- &target->commit->maybe_tree->object.oid,
+ diff_tree_oid(get_commit_tree_oid(parent),
+ get_commit_tree_oid(target->commit),
"", &diff_opts);
if (!diff_opts.flags.find_copies_harder)
diff --git a/builtin/checkout.c b/builtin/checkout.c
index b15fed5d85..3c8b2d0c27 100644
--- a/builtin/checkout.c
+++ b/builtin/checkout.c
@@ -484,7 +484,8 @@ static int merge_working_tree(const struct checkout_opts *opts,
resolve_undo_clear();
if (opts->force) {
- ret = reset_tree(new_branch_info->commit->maybe_tree, opts, 1, writeout_error);
+ ret = reset_tree(get_commit_tree(new_branch_info->commit),
+ opts, 1, writeout_error);
if (ret)
return ret;
} else {
@@ -570,18 +571,23 @@ static int merge_working_tree(const struct checkout_opts *opts,
o.verbosity = 0;
work = write_tree_from_memory(&o);
- ret = reset_tree(new_branch_info->commit->maybe_tree, opts, 1,
+ ret = reset_tree(get_commit_tree(new_branch_info->commit),
+ opts, 1,
writeout_error);
if (ret)
return ret;
o.ancestor = old_branch_info->name;
o.branch1 = new_branch_info->name;
o.branch2 = "local";
- ret = merge_trees(&o, new_branch_info->commit->maybe_tree, work,
- old_branch_info->commit->maybe_tree, &result);
+ ret = merge_trees(&o,
+ get_commit_tree(new_branch_info->commit),
+ work,
+ get_commit_tree(old_branch_info->commit),
+ &result);
if (ret < 0)
exit(128);
- ret = reset_tree(new_branch_info->commit->maybe_tree, opts, 0,
+ ret = reset_tree(get_commit_tree(new_branch_info->commit),
+ opts, 0,
writeout_error);
strbuf_release(&o.obuf);
if (ret)
@@ -1002,7 +1008,7 @@ static int parse_branchname_arg(int argc, const char **argv,
*source_tree = parse_tree_indirect(rev);
} else {
parse_commit_or_die(new_branch_info->commit);
- *source_tree = new_branch_info->commit->maybe_tree;
+ *source_tree = get_commit_tree(new_branch_info->commit);
}
if (!*source_tree) /* case (1): want a tree */
diff --git a/builtin/diff.c b/builtin/diff.c
index 34f18a5f3f..bfefff3a84 100644
--- a/builtin/diff.c
+++ b/builtin/diff.c
@@ -398,7 +398,7 @@ int cmd_diff(int argc, const char **argv, const char *prefix)
if (!obj)
die(_("invalid object '%s' given."), name);
if (obj->type == OBJ_COMMIT)
- obj = &((struct commit *)obj)->maybe_tree->object;
+ obj = &get_commit_tree(((struct commit *)obj))->object;
if (obj->type == OBJ_TREE) {
obj->flags |= flags;
diff --git a/builtin/fast-export.c b/builtin/fast-export.c
index 91e526b30d..c1304234cd 100644
--- a/builtin/fast-export.c
+++ b/builtin/fast-export.c
@@ -578,11 +578,11 @@ static void handle_commit(struct commit *commit, struct rev_info *rev,
get_object_mark(&commit->parents->item->object) != 0 &&
!full_tree) {
parse_commit_or_die(commit->parents->item);
- diff_tree_oid(&commit->parents->item->maybe_tree->object.oid,
- &commit->maybe_tree->object.oid, "", &rev->diffopt);
+ diff_tree_oid(get_commit_tree_oid(commit->parents->item),
+ get_commit_tree_oid(commit), "", &rev->diffopt);
}
else
- diff_root_tree_oid(&commit->maybe_tree->object.oid,
+ diff_root_tree_oid(get_commit_tree_oid(commit),
"", &rev->diffopt);
/* Export the referenced blobs, and remember the marks. */
diff --git a/builtin/log.c b/builtin/log.c
index f603b53318..25b4cb3347 100644
--- a/builtin/log.c
+++ b/builtin/log.c
@@ -1067,8 +1067,8 @@ static void make_cover_letter(struct rev_info *rev, int use_stdout,
diff_setup_done(&opts);
- diff_tree_oid(&origin->maybe_tree->object.oid,
- &head->maybe_tree->object.oid,
+ diff_tree_oid(get_commit_tree_oid(origin),
+ get_commit_tree_oid(head),
"", &opts);
diffcore_std(&opts);
diff_flush(&opts);
diff --git a/builtin/reflog.c b/builtin/reflog.c
index b17eabc009..88d0c8378c 100644
--- a/builtin/reflog.c
+++ b/builtin/reflog.c
@@ -154,7 +154,7 @@ static int commit_is_complete(struct commit *commit)
for (i = 0; i < found.nr; i++) {
struct commit *c =
(struct commit *)found.objects[i].item;
- if (!tree_is_complete(&c->maybe_tree->object.oid)) {
+ if (!tree_is_complete(get_commit_tree_oid(c))) {
is_incomplete = 1;
c->object.flags |= INCOMPLETE;
}
diff --git a/commit-graph.c b/commit-graph.c
index 005b4a753e..9f37d84209 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -369,7 +369,7 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len,
uint32_t packedDate[2];
parse_commit(*list);
- hashwrite(f, (*list)->maybe_tree->object.oid.hash, hash_len);
+ hashwrite(f, get_commit_tree_oid(*list)->hash, hash_len);
parent = (*list)->parents;
diff --git a/contrib/coccinelle/commit.cocci b/contrib/coccinelle/commit.cocci
new file mode 100644
index 0000000000..ac38525941
--- /dev/null
+++ b/contrib/coccinelle/commit.cocci
@@ -0,0 +1,30 @@
+@@
+expression c;
+@@
+- &c->maybe_tree->object.oid
++ get_commit_tree_oid(c)
+
+@@
+expression c;
+@@
+- c->maybe_tree->object.oid.hash
++ get_commit_tree_oid(c)->hash
+
+@@
+expression c;
+@@
+- c->maybe_tree
++ get_commit_tree(c)
+
+@@
+expression c;
+expression s;
+@@
+- get_commit_tree(c) = s
++ c->maybe_tree = s
+
+@@
+expression c;
+@@
+- return get_commit_tree(c);
++ return c->maybe_tree;
diff --git a/fsck.c b/fsck.c
index 3228ca5bee..695fd71b13 100644
--- a/fsck.c
+++ b/fsck.c
@@ -396,9 +396,11 @@ static int fsck_walk_commit(struct commit *commit, void *data, struct fsck_optio
name = get_object_name(options, &commit->object);
if (name)
- put_object_name(options, &commit->maybe_tree->object, "%s:", name);
+ put_object_name(options, &get_commit_tree(commit)->object,
+ "%s:", name);
- result = options->walk((struct object *)commit->maybe_tree, OBJ_TREE, data, options);
+ result = options->walk((struct object *)get_commit_tree(commit),
+ OBJ_TREE, data, options);
if (result < 0)
return result;
res = result;
@@ -772,7 +774,7 @@ static int fsck_commit_buffer(struct commit *commit, const char *buffer,
err = fsck_ident(&buffer, &commit->object, options);
if (err)
return err;
- if (!commit->maybe_tree) {
+ if (!get_commit_tree(commit)) {
err = report(options, &commit->object, FSCK_MSG_BAD_TREE, "could not load commit's tree %s", sha1_to_hex(tree_sha1));
if (err)
return err;
diff --git a/http-push.c b/http-push.c
index d83479f32f..53a217291e 100644
--- a/http-push.c
+++ b/http-push.c
@@ -1330,7 +1330,7 @@ static int get_delta(struct rev_info *revs, struct remote_lock *lock)
int count = 0;
while ((commit = get_revision(revs)) != NULL) {
- p = process_tree(commit->maybe_tree, p);
+ p = process_tree(get_commit_tree(commit), p);
commit->object.flags |= LOCAL;
if (!(commit->object.flags & UNINTERESTING))
count += add_send_request(&commit->object, lock);
diff --git a/line-log.c b/line-log.c
index e714969ca2..437d44c00a 100644
--- a/line-log.c
+++ b/line-log.c
@@ -817,8 +817,8 @@ static void queue_diffs(struct line_log_data *range,
assert(commit);
DIFF_QUEUE_CLEAR(&diff_queued_diff);
- diff_tree_oid(parent ? &parent->maybe_tree->object.oid : NULL,
- &commit->maybe_tree->object.oid, "", opt);
+ diff_tree_oid(parent ? get_commit_tree_oid(parent) : NULL,
+ get_commit_tree_oid(commit), "", opt);
if (opt->detect_rename) {
filter_diffs_for_paths(range, 1);
if (diff_might_be_rename())
diff --git a/list-objects.c b/list-objects.c
index bfd09f545c..3eec510357 100644
--- a/list-objects.c
+++ b/list-objects.c
@@ -195,7 +195,7 @@ static void mark_edge_parents_uninteresting(struct commit *commit,
struct commit *parent = parents->item;
if (!(parent->object.flags & UNINTERESTING))
continue;
- mark_tree_uninteresting(parent->maybe_tree);
+ mark_tree_uninteresting(get_commit_tree(parent));
if (revs->edge_hint && !(parent->object.flags & SHOWN)) {
parent->object.flags |= SHOWN;
show_edge(parent);
@@ -212,7 +212,7 @@ void mark_edges_uninteresting(struct rev_info *revs, show_edge_fn show_edge)
struct commit *commit = list->item;
if (commit->object.flags & UNINTERESTING) {
- mark_tree_uninteresting(commit->maybe_tree);
+ mark_tree_uninteresting(get_commit_tree(commit));
if (revs->edge_hint_aggressive && !(commit->object.flags & SHOWN)) {
commit->object.flags |= SHOWN;
show_edge(commit);
@@ -227,7 +227,7 @@ void mark_edges_uninteresting(struct rev_info *revs, show_edge_fn show_edge)
struct commit *commit = (struct commit *)obj;
if (obj->type != OBJ_COMMIT || !(obj->flags & UNINTERESTING))
continue;
- mark_tree_uninteresting(commit->maybe_tree);
+ mark_tree_uninteresting(get_commit_tree(commit));
if (!(obj->flags & SHOWN)) {
obj->flags |= SHOWN;
show_edge(commit);
@@ -300,8 +300,8 @@ static void do_traverse(struct rev_info *revs,
* an uninteresting boundary commit may not have its tree
* parsed yet, but we are not going to show them anyway
*/
- if (commit->maybe_tree)
- add_pending_tree(revs, commit->maybe_tree);
+ if (get_commit_tree(commit))
+ add_pending_tree(revs, get_commit_tree(commit));
show_commit(commit, show_data);
if (revs->tree_blobs_in_commit_order)
diff --git a/log-tree.c b/log-tree.c
index 99499af57c..c106bd70df 100644
--- a/log-tree.c
+++ b/log-tree.c
@@ -806,7 +806,7 @@ static int log_tree_diff(struct rev_info *opt, struct commit *commit, struct log
return 0;
parse_commit_or_die(commit);
- oid = &commit->maybe_tree->object.oid;
+ oid = get_commit_tree_oid(commit);
/* Root commit? */
parents = get_saved_parents(opt, commit);
@@ -831,7 +831,7 @@ static int log_tree_diff(struct rev_info *opt, struct commit *commit, struct log
* we merged _in_.
*/
parse_commit_or_die(parents->item);
- diff_tree_oid(&parents->item->maybe_tree->object.oid,
+ diff_tree_oid(get_commit_tree_oid(parents->item),
oid, "", &opt->diffopt);
log_tree_diff_flush(opt);
return !opt->loginfo;
@@ -846,7 +846,7 @@ static int log_tree_diff(struct rev_info *opt, struct commit *commit, struct log
struct commit *parent = parents->item;
parse_commit_or_die(parent);
- diff_tree_oid(&parent->maybe_tree->object.oid,
+ diff_tree_oid(get_commit_tree_oid(parent),
oid, "", &opt->diffopt);
log_tree_diff_flush(opt);
diff --git a/merge-recursive.c b/merge-recursive.c
index 67886a4ff8..a7e1938b8a 100644
--- a/merge-recursive.c
+++ b/merge-recursive.c
@@ -2154,8 +2154,8 @@ int merge_recursive(struct merge_options *o,
read_cache();
o->ancestor = "merged common ancestors";
- clean = merge_trees(o, h1->maybe_tree, h2->maybe_tree,
- merged_common_ancestors->maybe_tree,
+ clean = merge_trees(o, get_commit_tree(h1), get_commit_tree(h2),
+ get_commit_tree(merged_common_ancestors),
&mrtree);
if (clean < 0) {
flush_output(o);
diff --git a/notes-merge.c b/notes-merge.c
index 1d3edc8942..4a73a2169b 100644
--- a/notes-merge.c
+++ b/notes-merge.c
@@ -600,14 +600,14 @@ int notes_merge(struct notes_merge_options *o,
printf("No merge base found; doing history-less merge\n");
} else if (!bases->next) {
base_oid = &bases->item->object.oid;
- base_tree_oid = &bases->item->maybe_tree->object.oid;
+ base_tree_oid = get_commit_tree_oid(bases->item);
if (o->verbosity >= 4)
printf("One merge base found (%.7s)\n",
oid_to_hex(base_oid));
} else {
/* TODO: How to handle multiple merge-bases? */
base_oid = &bases->item->object.oid;
- base_tree_oid = &bases->item->maybe_tree->object.oid;
+ base_tree_oid = get_commit_tree_oid(bases->item);
if (o->verbosity >= 3)
printf("Multiple merge bases found. Using the first "
"(%.7s)\n", oid_to_hex(base_oid));
@@ -634,8 +634,9 @@ int notes_merge(struct notes_merge_options *o,
goto found_result;
}
- result = merge_from_diffs(o, base_tree_oid, &local->maybe_tree->object.oid,
- &remote->maybe_tree->object.oid, local_tree);
+ result = merge_from_diffs(o, base_tree_oid,
+ get_commit_tree_oid(local),
+ get_commit_tree_oid(remote), local_tree);
if (result != 0) { /* non-trivial merge (with or without conflicts) */
/* Commit (partial) result */
diff --git a/packfile.c b/packfile.c
index 3eb9c4a36e..88ba819151 100644
--- a/packfile.c
+++ b/packfile.c
@@ -1925,7 +1925,7 @@ static int add_promisor_object(const struct object_id *oid,
struct commit *commit = (struct commit *) obj;
struct commit_list *parents = commit->parents;
- oidset_insert(set, &commit->maybe_tree->object.oid);
+ oidset_insert(set, get_commit_tree_oid(commit));
for (; parents; parents = parents->next)
oidset_insert(set, &parents->item->object.oid);
} else if (obj->type == OBJ_TAG) {
diff --git a/pretty.c b/pretty.c
index 42095ea495..15997f5e01 100644
--- a/pretty.c
+++ b/pretty.c
@@ -1161,10 +1161,11 @@ static size_t format_commit_one(struct strbuf *sb, /* in UTF-8 */
strbuf_addstr(sb, diff_get_color(c->auto_color, DIFF_RESET));
return 1;
case 'T': /* tree hash */
- strbuf_addstr(sb, oid_to_hex(&commit->maybe_tree->object.oid));
+ strbuf_addstr(sb, oid_to_hex(get_commit_tree_oid(commit)));
return 1;
case 't': /* abbreviated tree hash */
- strbuf_add_unique_abbrev(sb, commit->maybe_tree->object.oid.hash,
+ strbuf_add_unique_abbrev(sb,
+ get_commit_tree_oid(commit)->hash,
c->pretty_ctx->abbrev);
return 1;
case 'P': /* parent hashes */
diff --git a/ref-filter.c b/ref-filter.c
index 783a3ee6cf..e201bf60c6 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -815,7 +815,7 @@ static void grab_commit_values(struct atom_value *val, int deref, struct object
if (deref)
name++;
if (!strcmp(name, "tree")) {
- v->s = xstrdup(oid_to_hex(&commit->maybe_tree->object.oid));
+ v->s = xstrdup(oid_to_hex(get_commit_tree_oid(commit)));
}
else if (!strcmp(name, "numparent")) {
v->value = commit_list_count(commit->parents);
diff --git a/revision.c b/revision.c
index 7d66e32e83..496db63b4b 100644
--- a/revision.c
+++ b/revision.c
@@ -440,8 +440,8 @@ static void file_change(struct diff_options *options,
static int rev_compare_tree(struct rev_info *revs,
struct commit *parent, struct commit *commit)
{
- struct tree *t1 = parent->maybe_tree;
- struct tree *t2 = commit->maybe_tree;
+ struct tree *t1 = get_commit_tree(parent);
+ struct tree *t2 = get_commit_tree(commit);
if (!t1)
return REV_TREE_NEW;
@@ -477,7 +477,7 @@ static int rev_compare_tree(struct rev_info *revs,
static int rev_same_tree_as_empty(struct rev_info *revs, struct commit *commit)
{
int retval;
- struct tree *t1 = commit->maybe_tree;
+ struct tree *t1 = get_commit_tree(commit);
if (!t1)
return 0;
@@ -615,7 +615,7 @@ static void try_to_simplify_commit(struct rev_info *revs, struct commit *commit)
if (!revs->prune)
return;
- if (!commit->maybe_tree)
+ if (!get_commit_tree(commit))
return;
if (!commit->parents) {
diff --git a/sequencer.c b/sequencer.c
index 3b2823c8b5..a5798b16d3 100644
--- a/sequencer.c
+++ b/sequencer.c
@@ -499,8 +499,8 @@ static int do_recursive_merge(struct commit *base, struct commit *next,
o.show_rename_progress = 1;
head_tree = parse_tree_indirect(head);
- next_tree = next ? next->maybe_tree : empty_tree();
- base_tree = base ? base->maybe_tree : empty_tree();
+ next_tree = next ? get_commit_tree(next) : empty_tree();
+ base_tree = base ? get_commit_tree(base) : empty_tree();
for (xopt = opts->xopts; xopt != opts->xopts + opts->xopts_nr; xopt++)
parse_merge_opt(&o, *xopt);
@@ -561,7 +561,7 @@ static int is_index_unchanged(void)
return error(_("unable to update cache tree"));
return !oidcmp(&active_cache_tree->oid,
- &head_commit->maybe_tree->object.oid);
+ get_commit_tree_oid(head_commit));
}
static int write_author_script(const char *message)
@@ -1118,7 +1118,7 @@ static int try_to_commit(struct strbuf *msg, const char *author,
}
if (!(flags & ALLOW_EMPTY) && !oidcmp(current_head ?
- ¤t_head->maybe_tree->object.oid :
+ get_commit_tree_oid(current_head) :
&empty_tree_oid, &tree)) {
res = 1; /* run 'git commit' to display error message */
goto out;
@@ -1216,12 +1216,12 @@ static int is_original_commit_empty(struct commit *commit)
if (parse_commit(parent))
return error(_("could not parse parent commit %s"),
oid_to_hex(&parent->object.oid));
- ptree_oid = &parent->maybe_tree->object.oid;
+ ptree_oid = get_commit_tree_oid(parent);
} else {
ptree_oid = the_hash_algo->empty_tree; /* commit is root */
}
- return !oidcmp(ptree_oid, &commit->maybe_tree->object.oid);
+ return !oidcmp(ptree_oid, get_commit_tree_oid(commit));
}
/*
diff --git a/sha1_name.c b/sha1_name.c
index 26b22cb628..0f408e9143 100644
--- a/sha1_name.c
+++ b/sha1_name.c
@@ -896,7 +896,7 @@ struct object *peel_to_type(const char *name, int namelen,
if (o->type == OBJ_TAG)
o = ((struct tag*) o)->tagged;
else if (o->type == OBJ_COMMIT)
- o = &(((struct commit *) o)->maybe_tree->object);
+ o = &(get_commit_tree(((struct commit *)o))->object);
else {
if (name)
error("%.*s: expected %s type, but the object "
diff --git a/tree.c b/tree.c
index dbc5e0be54..dec53f3eca 100644
--- a/tree.c
+++ b/tree.c
@@ -109,7 +109,7 @@ static int read_tree_1(struct tree *tree, struct strbuf *base,
oid_to_hex(entry.oid),
base->buf, entry.path);
- oidcpy(&oid, &commit->maybe_tree->object.oid);
+ oidcpy(&oid, get_commit_tree_oid(commit));
}
else
continue;
@@ -248,7 +248,7 @@ struct tree *parse_tree_indirect(const struct object_id *oid)
if (obj->type == OBJ_TREE)
return (struct tree *) obj;
else if (obj->type == OBJ_COMMIT)
- obj = &(((struct commit *) obj)->maybe_tree->object);
+ obj = &(get_commit_tree(((struct commit *)obj))->object);
else if (obj->type == OBJ_TAG)
obj = ((struct tag *) obj)->tagged;
else
diff --git a/walker.c b/walker.c
index 1d5f3059a2..f51b855872 100644
--- a/walker.c
+++ b/walker.c
@@ -87,7 +87,7 @@ static int process_commit(struct walker *walker, struct commit *commit)
walker_say(walker, "walk %s\n", oid_to_hex(&commit->object.oid));
if (walker->get_tree) {
- if (process(walker, &commit->maybe_tree->object))
+ if (process(walker, &get_commit_tree(commit)->object))
return -1;
if (!walker->get_all)
walker->get_tree = 0;
--
2.17.0
^ permalink raw reply related [flat|nested] 26+ messages in thread
* [PATCH v2 4/4] commit-graph: lazy-load trees for commits
2018-04-06 19:09 ` [PATCH v2 0/4] " Derrick Stolee
` (2 preceding siblings ...)
2018-04-06 19:09 ` [PATCH v2 3/4] treewide: replace maybe_tree with accessor methods Derrick Stolee
@ 2018-04-06 19:09 ` Derrick Stolee
2018-04-06 19:21 ` [PATCH v2 0/4] Lazy-load trees when reading commit-graph Jeff King
2018-04-07 18:40 ` Jakub Narebski
5 siblings, 0 replies; 26+ messages in thread
From: Derrick Stolee @ 2018-04-06 19:09 UTC (permalink / raw)
To: git; +Cc: peff, sbeller, avarab, larsxschneider, gitster, Derrick Stolee
The commit-graph file provides quick access to commit data, including
the OID of the root tree for each commit in the graph. When performing
a deep commit-graph walk, we may not need to load most of the trees
for these commits.
Delay loading the tree object for a commit loaded from the graph
until requested via get_commit_tree(). Do not lazy-load trees for
commits not in the graph, since that requires duplicate parsing
and the relative peformance improvement when trees are not needed
is small.
On the Linux repository, performance tests were run for the following
command:
git log --graph --oneline -1000
Before: 0.92s
After: 0.66s
Rel %: -28.3%
Adding '-- kernel/' to the command requires loading the root tree
for every commit that is walked. There was no measureable performance
change as a result of this patch.
Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
commit-graph.c | 26 +++++++++++++++++++++++---
commit-graph.h | 2 ++
commit.c | 8 +++++++-
commit.h | 6 ++++++
4 files changed, 38 insertions(+), 4 deletions(-)
diff --git a/commit-graph.c b/commit-graph.c
index 9f37d84209..a5de6f3102 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -247,7 +247,6 @@ static struct commit_list **insert_parent_or_die(struct commit_graph *g,
static int fill_commit_in_graph(struct commit *item, struct commit_graph *g, uint32_t pos)
{
- struct object_id oid;
uint32_t edge_value;
uint32_t *parent_data_ptr;
uint64_t date_low, date_high;
@@ -257,8 +256,7 @@ static int fill_commit_in_graph(struct commit *item, struct commit_graph *g, uin
item->object.parsed = 1;
item->graph_pos = pos;
- hashcpy(oid.hash, commit_data);
- item->maybe_tree = lookup_tree(&oid);
+ item->maybe_tree = NULL;
date_high = get_be32(commit_data + g->hash_len + 8) & 0x3;
date_low = get_be32(commit_data + g->hash_len + 12);
@@ -317,6 +315,28 @@ int parse_commit_in_graph(struct commit *item)
return 0;
}
+static struct tree *load_tree_for_commit(struct commit_graph *g, struct commit *c)
+{
+ struct object_id oid;
+ const unsigned char *commit_data = g->chunk_commit_data +
+ GRAPH_DATA_WIDTH * (c->graph_pos);
+
+ hashcpy(oid.hash, commit_data);
+ c->maybe_tree = lookup_tree(&oid);
+
+ return c->maybe_tree;
+}
+
+struct tree *get_commit_tree_in_graph(const struct commit *c)
+{
+ if (c->maybe_tree)
+ return c->maybe_tree;
+ if (c->graph_pos == COMMIT_NOT_FROM_GRAPH)
+ BUG("get_commit_tree_in_graph called from non-commit-graph commit");
+
+ return load_tree_for_commit(commit_graph, (struct commit *)c);
+}
+
static void write_graph_chunk_fanout(struct hashfile *f,
struct commit **commits,
int nr_commits)
diff --git a/commit-graph.h b/commit-graph.h
index e1d8580c98..260a468e73 100644
--- a/commit-graph.h
+++ b/commit-graph.h
@@ -17,6 +17,8 @@ char *get_commit_graph_filename(const char *obj_dir);
*/
int parse_commit_in_graph(struct commit *item);
+struct tree *get_commit_tree_in_graph(const struct commit *c);
+
struct commit_graph {
int graph_fd;
diff --git a/commit.c b/commit.c
index aea2ca1f8b..711f674c18 100644
--- a/commit.c
+++ b/commit.c
@@ -298,7 +298,13 @@ void free_commit_buffer(struct commit *commit)
struct tree *get_commit_tree(const struct commit *commit)
{
- return commit->maybe_tree;
+ if (commit->maybe_tree || !commit->object.parsed)
+ return commit->maybe_tree;
+
+ if (commit->graph_pos == COMMIT_NOT_FROM_GRAPH)
+ BUG("commit has NULL tree, but was not loaded from commit-graph");
+
+ return get_commit_tree_in_graph(commit);
}
struct object_id *get_commit_tree_oid(const struct commit *commit)
diff --git a/commit.h b/commit.h
index dc4bf97d9f..23a3f364ed 100644
--- a/commit.h
+++ b/commit.h
@@ -22,6 +22,12 @@ struct commit {
unsigned int index;
timestamp_t date;
struct commit_list *parents;
+
+ /*
+ * If the commit is loaded from the commit-graph file, then this
+ * member may be NULL. Only access it through get_commit_tree()
+ * or get_commit_tree_oid().
+ */
struct tree *maybe_tree;
uint32_t graph_pos;
};
--
2.17.0
^ permalink raw reply related [flat|nested] 26+ messages in thread
* Re: [PATCH v2 0/4] Lazy-load trees when reading commit-graph
2018-04-06 19:09 ` [PATCH v2 0/4] " Derrick Stolee
` (3 preceding siblings ...)
2018-04-06 19:09 ` [PATCH v2 4/4] commit-graph: lazy-load trees for commits Derrick Stolee
@ 2018-04-06 19:21 ` Jeff King
2018-04-06 19:41 ` Derrick Stolee
` (2 more replies)
2018-04-07 18:40 ` Jakub Narebski
5 siblings, 3 replies; 26+ messages in thread
From: Jeff King @ 2018-04-06 19:21 UTC (permalink / raw)
To: Derrick Stolee; +Cc: git, sbeller, avarab, larsxschneider, gitster
On Fri, Apr 06, 2018 at 07:09:30PM +0000, Derrick Stolee wrote:
> Derrick Stolee (4):
> treewide: rename tree to maybe_tree
> commit: create get_commit_tree() method
> treewide: replace maybe_tree with accessor methods
> commit-graph: lazy-load trees for commits
I gave this only a cursory read, but it addresses my concern from the
previous round.
If I were doing it myself, I probably would have folded patches 1 and 3
together. They are touching all the same spots, and it would be an error
for any case converted in patch 1 to not get converted in patch 3. I'm
assuming you caught them all due to Coccinelle, though IMHO it is
somewhat overkill here. By folding them together the compiler could tell
you which spots you missed.
And going forward, I doubt it is going to be a common error for people
to use maybe_tree directly. Between the name and the warning comment,
you'd have to really try to shoot yourself in the foot with it. The
primary concern was catching people using the existing "tree" name,
whose semantics changed.
All that said, I'm fine with having it done this way, too.
-Peff
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH v2 0/4] Lazy-load trees when reading commit-graph
2018-04-06 19:21 ` [PATCH v2 0/4] Lazy-load trees when reading commit-graph Jeff King
@ 2018-04-06 19:41 ` Derrick Stolee
2018-04-06 19:45 ` Stefan Beller
2018-04-08 23:18 ` Junio C Hamano
2 siblings, 0 replies; 26+ messages in thread
From: Derrick Stolee @ 2018-04-06 19:41 UTC (permalink / raw)
To: Jeff King, Derrick Stolee; +Cc: git, sbeller, avarab, larsxschneider, gitster
On 4/6/2018 3:21 PM, Jeff King wrote:
> On Fri, Apr 06, 2018 at 07:09:30PM +0000, Derrick Stolee wrote:
>
>> Derrick Stolee (4):
>> treewide: rename tree to maybe_tree
>> commit: create get_commit_tree() method
>> treewide: replace maybe_tree with accessor methods
>> commit-graph: lazy-load trees for commits
> I gave this only a cursory read, but it addresses my concern from the
> previous round.
>
> If I were doing it myself, I probably would have folded patches 1 and 3
> together. They are touching all the same spots, and it would be an error
> for any case converted in patch 1 to not get converted in patch 3. I'm
> assuming you caught them all due to Coccinelle, though IMHO it is
> somewhat overkill here. By folding them together the compiler could tell
> you which spots you missed.
>
> And going forward, I doubt it is going to be a common error for people
> to use maybe_tree directly. Between the name and the warning comment,
> you'd have to really try to shoot yourself in the foot with it. The
> primary concern was catching people using the existing "tree" name,
> whose semantics changed.
>
> All that said, I'm fine with having it done this way, too.
Thanks. As a double-check that I caught all of the 'maybe_tree'
accesses, I ran the following:
$ git grep maybe_tree | grep -v get_commit_tree
commit-graph.c: item->maybe_tree = NULL;
commit-graph.c: c->maybe_tree = lookup_tree(&oid);
commit-graph.c: return c->maybe_tree;
commit-graph.c: if (c->maybe_tree)
commit-graph.c: return c->maybe_tree;
commit.c: if (commit->maybe_tree || !commit->object.parsed)
commit.c: return commit->maybe_tree;
commit.c: item->maybe_tree = lookup_tree(&parent);
commit.h: struct tree *maybe_tree;
contrib/coccinelle/commit.cocci:- &c->maybe_tree->object.oid
contrib/coccinelle/commit.cocci:- c->maybe_tree->object.oid.hash
contrib/coccinelle/commit.cocci:- c->maybe_tree
contrib/coccinelle/commit.cocci:+ c->maybe_tree = s
contrib/coccinelle/commit.cocci:+ return c->maybe_tree;
merge-recursive.c: commit->maybe_tree = tree;
Thanks,
-Stolee
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH v2 0/4] Lazy-load trees when reading commit-graph
2018-04-06 19:21 ` [PATCH v2 0/4] Lazy-load trees when reading commit-graph Jeff King
2018-04-06 19:41 ` Derrick Stolee
@ 2018-04-06 19:45 ` Stefan Beller
2018-04-08 23:18 ` Junio C Hamano
2 siblings, 0 replies; 26+ messages in thread
From: Stefan Beller @ 2018-04-06 19:45 UTC (permalink / raw)
To: Jeff King; +Cc: Derrick Stolee, git, avarab, larsxschneider, gitster
On Fri, Apr 6, 2018 at 12:21 PM, Jeff King <peff@peff.net> wrote:
> On Fri, Apr 06, 2018 at 07:09:30PM +0000, Derrick Stolee wrote:
>
>> Derrick Stolee (4):
>> treewide: rename tree to maybe_tree
>> commit: create get_commit_tree() method
>> treewide: replace maybe_tree with accessor methods
>> commit-graph: lazy-load trees for commits
>
> I gave this only a cursory read, but it addresses my concern from the
> previous round.
Same here.
Thanks,
Stefan
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH v2 0/4] Lazy-load trees when reading commit-graph
2018-04-06 19:21 ` [PATCH v2 0/4] Lazy-load trees when reading commit-graph Jeff King
2018-04-06 19:41 ` Derrick Stolee
2018-04-06 19:45 ` Stefan Beller
@ 2018-04-08 23:18 ` Junio C Hamano
2018-04-09 13:15 ` Derrick Stolee
2 siblings, 1 reply; 26+ messages in thread
From: Junio C Hamano @ 2018-04-08 23:18 UTC (permalink / raw)
To: Jeff King; +Cc: Derrick Stolee, git, sbeller, avarab, larsxschneider
Jeff King <peff@peff.net> writes:
> If I were doing it myself, I probably would have folded patches 1 and 3
> together. They are touching all the same spots, and it would be an error
> for any case converted in patch 1 to not get converted in patch 3. I'm
> assuming you caught them all due to Coccinelle, though IMHO it is
> somewhat overkill here. By folding them together the compiler could tell
> you which spots you missed.
Yeah, that approach would probably be a more sensible way to assure
the safety/correctness of the result to readers better.
>
> And going forward, I doubt it is going to be a common error for people
> to use maybe_tree directly. Between the name and the warning comment,
> you'd have to really try to shoot yourself in the foot with it. The
> primary concern was catching people using the existing "tree" name,
> whose semantics changed.
Yup.
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH v2 0/4] Lazy-load trees when reading commit-graph
2018-04-08 23:18 ` Junio C Hamano
@ 2018-04-09 13:15 ` Derrick Stolee
2018-04-09 17:25 ` Stefan Beller
0 siblings, 1 reply; 26+ messages in thread
From: Derrick Stolee @ 2018-04-09 13:15 UTC (permalink / raw)
To: Junio C Hamano, Jeff King
Cc: Derrick Stolee, git, sbeller, avarab, larsxschneider
On 4/8/2018 7:18 PM, Junio C Hamano wrote:
> Jeff King <peff@peff.net> writes:
>
>> If I were doing it myself, I probably would have folded patches 1 and 3
>> together. They are touching all the same spots, and it would be an error
>> for any case converted in patch 1 to not get converted in patch 3. I'm
>> assuming you caught them all due to Coccinelle, though IMHO it is
>> somewhat overkill here. By folding them together the compiler could tell
>> you which spots you missed.
> Yeah, that approach would probably be a more sensible way to assure
> the safety/correctness of the result to readers better.
I don't understand how folding the patches makes the correctness
clearer, since the rename (1/4) is checked by the compiler and the
Coccinelle script (3/4) only works after that rename is complete.
The only thing I can imagine is that it makes smaller patch emails,
since there is only one large patch instead of two. In this case, I
prefer to make changes that are easier to check by automation (compiler
and coccinelle).
However, I will defer to the experts in this regard. If a v3 is
necessary, then I will fold the commits together.
Thanks,
-Stolee
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH v2 0/4] Lazy-load trees when reading commit-graph
2018-04-09 13:15 ` Derrick Stolee
@ 2018-04-09 17:25 ` Stefan Beller
0 siblings, 0 replies; 26+ messages in thread
From: Stefan Beller @ 2018-04-09 17:25 UTC (permalink / raw)
To: Derrick Stolee
Cc: Junio C Hamano, Jeff King, Derrick Stolee, git, avarab, larsxschneider
On Mon, Apr 9, 2018 at 6:15 AM, Derrick Stolee <stolee@gmail.com> wrote:
> I don't understand how folding the patches makes the correctness clearer,
> since the rename (1/4) is checked by the compiler and the Coccinelle script
> (3/4) only works after that rename is complete.
>
> The only thing I can imagine is that it makes smaller patch emails, since
> there is only one large patch instead of two. In this case, I prefer to make
> changes that are easier to check by automation (compiler and coccinelle).
>
I prefer the offloading to automation, too.
So I would vouch for keeping it as-is.
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH v2 0/4] Lazy-load trees when reading commit-graph
2018-04-06 19:09 ` [PATCH v2 0/4] " Derrick Stolee
` (4 preceding siblings ...)
2018-04-06 19:21 ` [PATCH v2 0/4] Lazy-load trees when reading commit-graph Jeff King
@ 2018-04-07 18:40 ` Jakub Narebski
2018-04-08 1:17 ` Derrick Stolee
5 siblings, 1 reply; 26+ messages in thread
From: Jakub Narebski @ 2018-04-07 18:40 UTC (permalink / raw)
To: Derrick Stolee; +Cc: git, peff, sbeller, avarab, larsxschneider, gitster
Derrick Stolee <dstolee@microsoft.com> writes:
[...]
> On the Linux repository, performance tests were run for the following
> command:
>
> git log --graph --oneline -1000
>
> Before: 0.92s
> After: 0.66s
> Rel %: -28.3%
>
> Adding '-- kernel/' to the command requires loading the root tree
> for every commit that is walked. There was no measureable performance
> change as a result of this patch.
In the "Git Merge contributor summit notes" [1] one can read that:
> - VSTS adds bloom filters to know which paths have changed on the commit
> - tree-same check in the bloom filter is fast; speeds up file history checks
> - if the file history is _very_ sparse, then bloom filter is useful
Could this method speed up also the second case mentioned here? Can
anyone explain how this "path-changed bloom filter" works in VSTS?
Could we add something like this to the commit-graph file?
[1]: https://public-inbox.org/git/alpine.DEB.2.20.1803091557510.23109@alexmv-linux/
Best regards,
--
Jakub Narębski
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH v2 0/4] Lazy-load trees when reading commit-graph
2018-04-07 18:40 ` Jakub Narebski
@ 2018-04-08 1:17 ` Derrick Stolee
2018-04-11 20:41 ` Jakub Narebski
0 siblings, 1 reply; 26+ messages in thread
From: Derrick Stolee @ 2018-04-08 1:17 UTC (permalink / raw)
To: Jakub Narebski, Derrick Stolee
Cc: git, peff, sbeller, avarab, larsxschneider, gitster
On 4/7/2018 2:40 PM, Jakub Narebski wrote:
> Derrick Stolee <dstolee@microsoft.com> writes:
>
> [...]
>> On the Linux repository, performance tests were run for the following
>> command:
>>
>> git log --graph --oneline -1000
>>
>> Before: 0.92s
>> After: 0.66s
>> Rel %: -28.3%
>>
>> Adding '-- kernel/' to the command requires loading the root tree
>> for every commit that is walked. There was no measureable performance
>> change as a result of this patch.
> In the "Git Merge contributor summit notes" [1] one can read that:
>
>> - VSTS adds bloom filters to know which paths have changed on the commit
>> - tree-same check in the bloom filter is fast; speeds up file history checks
>> - if the file history is _very_ sparse, then bloom filter is useful
> Could this method speed up also the second case mentioned here? Can
> anyone explain how this "path-changed bloom filter" works in VSTS?
>
The idea is simple: for every commit, store a Bloom filter containing
the list of paths that are not TREESAME against the first parent. (A
slight detail: have a max cap on the number of paths, and store simply
"TOO_BIG" for commits with too many diffs.)
When performing 'git log -- path' queries, the most important detail for
considering how to advance the walk is whether the commit is TREESAME to
its first parent. For a deep path in a large repo, this is almost always
true. When a Bloom filter says "TREESAME" (i.e. "this path is not in my
set") it is always correct, so we can set the treesame bit and continue
without walking any trees. When a Bloom filter says "MAYBE NOT TREESAME"
(i.e. "this path is probably in my set") you only need to do the same
work as before: walk the trees to compare against your first parent.
If a Bloom filter has a false-positive rate of X%, then you can possibly
drop your number of tree comparisons by (100-X)%. This is very important
for large repos where some paths were changed only ten times or so, the
full graph needs to be walked and it is helpful to avoid parsing too
many trees.
> Could we add something like this to the commit-graph file?
I'm not sure if it is necessary for client-side operations, but it is
one of the reasons the commit-graph file has the idea of an "optional
chunk". It could be added to the file format (without changing version
numbers) and be ignored by clients that don't understand it. I could
also be gated by a config setting for computing them. My guess is that
only server-side operations will need the added response time, and can
bear the cost of computing them when writing the commit-graph file.
Clients are less likely to be patient waiting for a lot of diff
calculations.
If we add commit-graph file downloads to the protocol, then the server
could do this computation and send the data to all clients. But that
would be "secondary" information that maybe clients want to verify,
which is as difficult as computing it themselves.
Thanks,
-Stolee
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH v2 0/4] Lazy-load trees when reading commit-graph
2018-04-08 1:17 ` Derrick Stolee
@ 2018-04-11 20:41 ` Jakub Narebski
0 siblings, 0 replies; 26+ messages in thread
From: Jakub Narebski @ 2018-04-11 20:41 UTC (permalink / raw)
To: Derrick Stolee
Cc: Derrick Stolee, git, peff, sbeller, avarab, larsxschneider, gitster
Derrick Stolee <stolee@gmail.com> writes:
> On 4/7/2018 2:40 PM, Jakub Narebski wrote:
>> Derrick Stolee <dstolee@microsoft.com> writes:
>>
>> [...]
>>> On the Linux repository, performance tests were run for the following
>>> command:
>>>
>>> git log --graph --oneline -1000
>>>
>>> Before: 0.92s
>>> After: 0.66s
>>> Rel %: -28.3%
>>>
>>> Adding '-- kernel/' to the command requires loading the root tree
>>> for every commit that is walked. There was no measureable performance
>>> change as a result of this patch.
>>
>> In the "Git Merge contributor summit notes" [1] one can read that:
>>
>>> - VSTS adds bloom filters to know which paths have changed on the commit
>>> - tree-same check in the bloom filter is fast; speeds up file history checks
>>> - if the file history is _very_ sparse, then bloom filter is useful
>>
>> Could this method speed up also the second case mentioned here? Can
>> anyone explain how this "path-changed bloom filter" works in VSTS?
>>
>
> The idea is simple: for every commit, store a Bloom filter containing
> the list of paths that are not TREESAME against the first parent. (A
> slight detail: have a max cap on the number of paths, and store simply
> "TOO_BIG" for commits with too many diffs.)
Isn't Bloom filter with all bits set to 1 equivalent of "too big"? It
would answer each query with "possibly in set, check it".
> When performing 'git log -- path' queries, the most important detail
> for considering how to advance the walk is whether the commit is
> TREESAME to its first parent. For a deep path in a large repo, this is
> almost always true. When a Bloom filter says "TREESAME" (i.e. "this
> path is not in my set") it is always correct, so we can set the
> treesame bit and continue without walking any trees. When a Bloom
> filter says "MAYBE NOT TREESAME" (i.e. "this path is probably in my
> set") you only need to do the same work as before: walk the trees to
> compare against your first parent.
>
> If a Bloom filter has a false-positive rate of X%, then you can
> possibly drop your number of tree comparisons by (100-X)%. This is
> very important for large repos where some paths were changed only ten
> times or so, the full graph needs to be walked and it is helpful to
> avoid parsing too many trees.
So this works only for exact file pathnames, or does checking for
subdirectory also work? I guess it cannot work for globbing patterns
(like "git log -- '*.c'").
I guess that it speeds up "git log -- <pathname>", but also "git blame
<pathname>".
Sidenote: I have stumbled upon https://github.com/efficient/ffbf project
(fast-forward Bllom filters - for exact matching of large number of
patterns), where they invent cache-efficient version of Bloom filter
algorithm. The paper that the project is based on also might be
interesting, if only because it points to other improvements of classic
Bloom filters.
>> Could we add something like this to the commit-graph file?
>
> I'm not sure if it is necessary for client-side operations, but it is
> one of the reasons the commit-graph file has the idea of an "optional
> chunk". It could be added to the file format (without changing version
> numbers) and be ignored by clients that don't understand it. I could
> also be gated by a config setting for computing them. My guess is that
> only server-side operations will need the added response time, and can
> bear the cost of computing them when writing the commit-graph
> file. Clients are less likely to be patient waiting for a lot of diff
> calculations.
So the problem is performing large amount of diff calculations. I
wonder if it would be possible to add Bloom filters incrementally, when
for some reason we need to calculate diff anyway.
>
> If we add commit-graph file downloads to the protocol, then the server
> could do this computation and send the data to all clients. But that
> would be "secondary" information that maybe clients want to verify,
> which is as difficult as computing it themselves.
Can you tell us how much work (how much time) must spend the server to
calculate those per-commit "files changed" Bloom fillters?
Best,
--
Jakub Narębski
^ permalink raw reply [flat|nested] 26+ messages in thread