Git Mailing List Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH] git-rev-list: add --exclude-path-first-parent flag
@ 2021-04-17  0:15 Jerry Zhang
  2021-04-17  0:45 ` Junio C Hamano
  2021-04-21  0:48 ` [PATCH V2] git-rev-list: add --first-parent-not flag Jerry Zhang
  0 siblings, 2 replies; 7+ messages in thread
From: Jerry Zhang @ 2021-04-17  0:15 UTC (permalink / raw)
  To: git, gitster; +Cc: ross, abe, brian.kubisiak, Jerry Zhang

Add the --exclude-path-first-parent flag,
which works similarly to --first-parent,
but affects only the graph traversal for
the set of commits being excluded.

   -A-------E-HEAD
     \     /
      B-C-D

In this example, the goal is to return the
set {B, C, D} which represents a working
branch that has been merged into main branch
E. `git rev-list D ^E` will end up returning
no commits since the exclude path eliminates
D and its ancestors.
`git rev-list --exclude-path-first-parent D ^E`
however will return {B, C, D} as desired.

Add docs for the new flag, and clarify the
doc for --first-parent to indicate that it
applies to traversing the set of included
commits only. The semantics of existing flags
however have not changed.

Signed-off-by: Jerry Zhang <jerry@skydio.com>
---
 Documentation/rev-list-options.txt | 21 ++++++++++++++-------
 blame.c                            |  2 +-
 revision.c                         | 30 ++++++++++++++++++++----------
 revision.h                         |  3 ++-
 shallow.c                          |  2 +-
 t/t6012-rev-list-simplify.sh       | 18 ++++++++++++------
 6 files changed, 50 insertions(+), 26 deletions(-)

diff --git a/Documentation/rev-list-options.txt b/Documentation/rev-list-options.txt
index b1c8f86c6e..681d6649b4 100644
--- a/Documentation/rev-list-options.txt
+++ b/Documentation/rev-list-options.txt
@@ -122,19 +122,26 @@ again.  Equivalent forms are `--min-parents=0` (any commit has 0 or more
 parents) and `--max-parents=-1` (negative numbers denote no upper limit).
 
 --first-parent::
-	Follow only the first parent commit upon seeing a merge
-	commit.  This option can give a better overview when
-	viewing the evolution of a particular topic branch,
-	because merges into a topic branch tend to be only about
-	adjusting to updated upstream from time to time, and
-	this option allows you to ignore the individual commits
-	brought in to your history by such a merge.
+	When finding commits to include, follow only the first
+	parent commit upon seeing a merge commit.  This option
+	can give a better overview when viewing the evolution of
+	a particular topic branch, because merges into a topic
+	branch tend to be only about adjusting to updated upstream
+	from time to time, and this option allows you to ignore
+	the individual commits brought in to your history by such
+	a merge.
 ifdef::git-log[]
 +
 This option also changes default diff format for merge commits
 to `first-parent`, see `--diff-merges=first-parent` for details.
 endif::git-log[]
 
+--exclude-path-first-parent::
+	When finding commits to exclude, follow only the first
+	parent commit upon seeing a merge commit.  This causes
+	exclusions to exclude only commits on that branch itself
+	and not those brought in by a merge.
+
 --not::
 	Reverses the meaning of the '{caret}' prefix (or lack thereof)
 	for all following revision specifiers, up to the next `--not`.
diff --git a/blame.c b/blame.c
index 5018bb8fb2..8354dc0252 100644
--- a/blame.c
+++ b/blame.c
@@ -2615,7 +2615,7 @@ void assign_blame(struct blame_scoreboard *sb, int opt)
 		else {
 			commit->object.flags |= UNINTERESTING;
 			if (commit->object.parsed)
-				mark_parents_uninteresting(commit);
+				mark_parents_uninteresting(sb->revs, commit);
 		}
 		/* treat root commit as boundary */
 		if (!commit->parents && !sb->show_root)
diff --git a/revision.c b/revision.c
index 553c0faa9b..e1deb3f194 100644
--- a/revision.c
+++ b/revision.c
@@ -268,7 +268,7 @@ static void commit_stack_clear(struct commit_stack *stack)
 	stack->nr = stack->alloc = 0;
 }
 
-static void mark_one_parent_uninteresting(struct commit *commit,
+static void mark_one_parent_uninteresting(struct rev_info *revs, struct commit *commit,
 					  struct commit_stack *pending)
 {
 	struct commit_list *l;
@@ -285,20 +285,26 @@ static void mark_one_parent_uninteresting(struct commit *commit,
 	 * wasn't uninteresting), in which case we need
 	 * to mark its parents recursively too..
 	 */
-	for (l = commit->parents; l; l = l->next)
+	for (l = commit->parents; l; l = l->next) {
 		commit_stack_push(pending, l->item);
+		if (revs && revs->exclude_path_first_parent_only)
+			break;
+	}
 }
 
-void mark_parents_uninteresting(struct commit *commit)
+void mark_parents_uninteresting(struct rev_info *revs, struct commit *commit)
 {
 	struct commit_stack pending = COMMIT_STACK_INIT;
 	struct commit_list *l;
 
-	for (l = commit->parents; l; l = l->next)
-		mark_one_parent_uninteresting(l->item, &pending);
+	for (l = commit->parents; l; l = l->next) {
+		mark_one_parent_uninteresting(revs, l->item, &pending);
+		if (revs && revs->exclude_path_first_parent_only)
+			break;
+	}
 
 	while (pending.nr > 0)
-		mark_one_parent_uninteresting(commit_stack_pop(&pending),
+		mark_one_parent_uninteresting(revs, commit_stack_pop(&pending),
 					      &pending);
 
 	commit_stack_clear(&pending);
@@ -437,7 +443,7 @@ static struct commit *handle_commit(struct rev_info *revs,
 		if (repo_parse_commit(revs->repo, commit) < 0)
 			die("unable to parse commit %s", name);
 		if (flags & UNINTERESTING) {
-			mark_parents_uninteresting(commit);
+			mark_parents_uninteresting(revs, commit);
 
 			if (!revs->topo_order || !generation_numbers_enabled(the_repository))
 				revs->limited = 1;
@@ -1120,7 +1126,7 @@ static int process_parents(struct rev_info *revs, struct commit *commit,
 			if (repo_parse_commit_gently(revs->repo, p, 1) < 0)
 				continue;
 			if (p->parents)
-				mark_parents_uninteresting(p);
+				mark_parents_uninteresting(revs, p);
 			if (p->object.flags & SEEN)
 				continue;
 			p->object.flags |= SEEN;
@@ -1128,6 +1134,8 @@ static int process_parents(struct rev_info *revs, struct commit *commit,
 				commit_list_insert_by_date(p, list);
 			if (queue)
 				prio_queue_put(queue, p);
+			if (revs->exclude_path_first_parent_only)
+				break;
 		}
 		return 0;
 	}
@@ -1418,7 +1426,7 @@ static int limit_list(struct rev_info *revs)
 		if (process_parents(revs, commit, &list, NULL) < 0)
 			return -1;
 		if (obj->flags & UNINTERESTING) {
-			mark_parents_uninteresting(commit);
+			mark_parents_uninteresting(revs, commit);
 			slop = still_interesting(list, date, slop, &interesting_cache);
 			if (slop)
 				continue;
@@ -2214,6 +2222,8 @@ static int handle_revision_opt(struct rev_info *revs, int argc, const char **arg
 		return argcount;
 	} else if (!strcmp(arg, "--first-parent")) {
 		revs->first_parent_only = 1;
+	} else if (!strcmp(arg, "--exclude-path-first-parent")) {
+		revs->exclude_path_first_parent_only = 1;
 	} else if (!strcmp(arg, "--ancestry-path")) {
 		revs->ancestry_path = 1;
 		revs->simplify_history = 0;
@@ -3341,7 +3351,7 @@ static void explore_walk_step(struct rev_info *revs)
 		return;
 
 	if (c->object.flags & UNINTERESTING)
-		mark_parents_uninteresting(c);
+		mark_parents_uninteresting(revs, c);
 
 	for (p = c->parents; p; p = p->next)
 		test_flag_and_insert(&info->explore_queue, p->item, TOPO_WALK_EXPLORED);
diff --git a/revision.h b/revision.h
index a24f72dcd1..712710ed62 100644
--- a/revision.h
+++ b/revision.h
@@ -164,6 +164,7 @@ struct rev_info {
 			bisect:1,
 			ancestry_path:1,
 			first_parent_only:1,
+			exclude_path_first_parent_only:1,
 			line_level_traverse:1,
 			tree_blobs_in_commit_order:1,
 
@@ -405,7 +406,7 @@ const char *get_revision_mark(const struct rev_info *revs,
 void put_revision_mark(const struct rev_info *revs,
 		       const struct commit *commit);
 
-void mark_parents_uninteresting(struct commit *commit);
+void mark_parents_uninteresting(struct rev_info *revs, struct commit *commit);
 void mark_tree_uninteresting(struct repository *r, struct tree *tree);
 void mark_trees_uninteresting_sparse(struct repository *r, struct oidset *trees);
 
diff --git a/shallow.c b/shallow.c
index 9ed18eb884..71e5876f37 100644
--- a/shallow.c
+++ b/shallow.c
@@ -603,7 +603,7 @@ static int mark_uninteresting(const char *refname, const struct object_id *oid,
 	if (!commit)
 		return 0;
 	commit->object.flags |= UNINTERESTING;
-	mark_parents_uninteresting(commit);
+	mark_parents_uninteresting(NULL, commit);
 	return 0;
 }
 
diff --git a/t/t6012-rev-list-simplify.sh b/t/t6012-rev-list-simplify.sh
index 4f7fa8b6c0..aa8220f3a4 100755
--- a/t/t6012-rev-list-simplify.sh
+++ b/t/t6012-rev-list-simplify.sh
@@ -16,13 +16,12 @@ unnote () {
 }
 
 #
-# Create a test repo with interesting commit graph:
+# Create a test repo with an interesting commit graph:
 #
-# A--B----------G--H--I--K--L
-#  \  \           /     /
-#   \  \         /     /
-#    C------E---F     J
-#        \_/
+# A-----B-----G--H--I--K--L
+#  \     \      /     /
+#   \     \    /     /
+#    C--D--E--F     J
 #
 # The commits are laid out from left-to-right starting with
 # the root commit A and terminating at the tip commit L.
@@ -142,6 +141,13 @@ check_result 'I B A' --author-date-order -- file
 check_result 'H' --first-parent -- another-file
 check_result 'H' --first-parent --topo-order -- another-file
 
+check_result 'L K I H G B A' --first-parent L
+check_result 'F E D C' --exclude-path-first-parent F ^L
+check_result '' F ^L
+check_result 'L K I H G J' L ^F
+check_result 'L K I H G B J' --exclude-path-first-parent L ^F
+check_result 'L K I H G B' --exclude-path-first-parent --first-parent L ^F
+
 check_result 'E C B A' --full-history E -- lost
 test_expect_success 'full history simplification without parent' '
 	printf "%s\n" E C B A >expect &&
-- 
2.29.0


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] git-rev-list: add --exclude-path-first-parent flag
  2021-04-17  0:15 [PATCH] git-rev-list: add --exclude-path-first-parent flag Jerry Zhang
@ 2021-04-17  0:45 ` Junio C Hamano
  2021-04-17  1:07   ` Jerry Zhang
  2021-04-21  0:48 ` [PATCH V2] git-rev-list: add --first-parent-not flag Jerry Zhang
  1 sibling, 1 reply; 7+ messages in thread
From: Junio C Hamano @ 2021-04-17  0:45 UTC (permalink / raw)
  To: Jerry Zhang; +Cc: git, ross, abe, brian.kubisiak

Jerry Zhang <jerry@skydio.com> writes:

> Add the --exclude-path-first-parent flag,
> which works similarly to --first-parent,
> but affects only the graph traversal for
> the set of commits being excluded.
>
>    -A-------E-HEAD
>      \     /
>       B-C-D
>
> In this example, the goal is to return the
> set {B, C, D} which represents a working
> branch that has been merged into main branch
> E. `git rev-list D ^E` will end up returning
> no commits since the exclude path eliminates
> D and its ancestors.
> `git rev-list --exclude-path-first-parent D ^E`
> however will return {B, C, D} as desired.

It is not clera why you want to have this, instead of doing a more
obvious "D..E^".  Even better is "E^..E", which is often what you
want when viewing a history like my 'seen' that is a straight-line
into which tips of branches are merged.

E^..E (or doing the same for any commit on the mainline in such a
history whose first-parent chain solely consists of merges) would
show the list of the commits that came from the side branch that was
merged, plus the merge commit, where the committer who created a
merge would hava a chance to give a summary of what happened on the
side branch.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] git-rev-list: add --exclude-path-first-parent flag
  2021-04-17  0:45 ` Junio C Hamano
@ 2021-04-17  1:07   ` Jerry Zhang
  2021-04-17  4:09     ` Felipe Contreras
  2021-04-17  7:22     ` Junio C Hamano
  0 siblings, 2 replies; 7+ messages in thread
From: Jerry Zhang @ 2021-04-17  1:07 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Git Mailing List, Ross Yeager, Abraham Bachrach, Brian Kubisiak

On Fri, Apr 16, 2021 at 5:45 PM Junio C Hamano <gitster@pobox.com> wrote:
>
> Jerry Zhang <jerry@skydio.com> writes:
>
> > Add the --exclude-path-first-parent flag,
> > which works similarly to --first-parent,
> > but affects only the graph traversal for
> > the set of commits being excluded.
> >
> >    -A-------E-HEAD
> >      \     /
> >       B-C-D
> >
> > In this example, the goal is to return the
> > set {B, C, D} which represents a working
> > branch that has been merged into main branch
> > E. `git rev-list D ^E` will end up returning
> > no commits since the exclude path eliminates
> > D and its ancestors.
> > `git rev-list --exclude-path-first-parent D ^E`
> > however will return {B, C, D} as desired.
>
> It is not clera why you want to have this, instead of doing a more
> obvious "D..E^".  Even better is "E^..E", which is often what you
> want when viewing a history like my 'seen' that is a straight-line
> into which tips of branches are merged.
My motivation is to find the point at which a release branch forked off from
a main branch, even though the release branch could have been merged
into the main branch multiple times since it was forked off.

If we add another merge from release to main, it will be more clear
that those give different results:

        -A-------E-F-main
          \   / /
           B-C-D-release

`git rev-list --exclude-path-first-parent release ^main` returns {B, C, D}.
I've added commit F to show that we don't necessarily have info on E,
there could be many commits between it and the tip of main.

`git rev-list E^..E` returns {E, D} only, it depends on us knowing E and
loses all the commits after a merge from release to main.

I could do `git rev-parse (git rev-list --first-parent main
^release)^` and I'd get
A, but then I have to run `git rev-list D ^A` to get the set {B, C, D},
whereas the --exclude-path-first-parent invocation gives exactly
what I want in one invocation. I'm sure there's more use-cases
I'm not able to think of.

>
> E^..E (or doing the same for any commit on the mainline in such a
> history whose first-parent chain solely consists of merges) would
> show the list of the commits that came from the side branch that was
> merged, plus the merge commit, where the committer who created a
> merge would hava a chance to give a summary of what happened on the
> side branch.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] git-rev-list: add --exclude-path-first-parent flag
  2021-04-17  1:07   ` Jerry Zhang
@ 2021-04-17  4:09     ` Felipe Contreras
  2021-04-17  7:22     ` Junio C Hamano
  1 sibling, 0 replies; 7+ messages in thread
From: Felipe Contreras @ 2021-04-17  4:09 UTC (permalink / raw)
  To: Jerry Zhang, Junio C Hamano
  Cc: Git Mailing List, Ross Yeager, Abraham Bachrach, Brian Kubisiak

Jerry Zhang wrote:
> My motivation is to find the point at which a release branch forked off from
> a main branch, even though the release branch could have been merged
> into the main branch multiple times since it was forked off.

As far as I'm aware this is precisely the only advantage of Mercurial
over Git.

Last time I checked the only way to actually find out the fork point
considering multiple corner cases was to directly store it.

Perhaps --exclude-path-first-parent would be a good approximation.

-- 
Felipe Contreras

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] git-rev-list: add --exclude-path-first-parent flag
  2021-04-17  1:07   ` Jerry Zhang
  2021-04-17  4:09     ` Felipe Contreras
@ 2021-04-17  7:22     ` Junio C Hamano
  2021-04-21  0:16       ` Jerry Zhang
  1 sibling, 1 reply; 7+ messages in thread
From: Junio C Hamano @ 2021-04-17  7:22 UTC (permalink / raw)
  To: Jerry Zhang
  Cc: Git Mailing List, Ross Yeager, Abraham Bachrach, Brian Kubisiak

Jerry Zhang <jerry@skydio.com> writes:

> On Fri, Apr 16, 2021 at 5:45 PM Junio C Hamano <gitster@pobox.com> wrote:
>>
>> Jerry Zhang <jerry@skydio.com> writes:
>>
>> > Add the --exclude-path-first-parent flag,
>> > which works similarly to --first-parent,
>> > but affects only the graph traversal for
>> > the set of commits being excluded.
>> >
>> >    -A-------E-HEAD
>> >      \     /
>> >       B-C-D
>> >
>> > In this example, the goal is to return the
>> > set {B, C, D} which represents a working
>> > branch that has been merged into main branch
>> > E. `git rev-list D ^E` will end up returning
>> > no commits since the exclude path eliminates
>> > D and its ancestors.
>> > `git rev-list --exclude-path-first-parent D ^E`
>> > however will return {B, C, D} as desired.
>>
>> It is not clera why you want to have this, instead of doing a more
>> obvious "D..E^".  Even better is "E^..E", which is often what you
>> want when viewing a history like my 'seen' that is a straight-line
>> into which tips of branches are merged.
> My motivation is to find the point at which a release branch forked off from
> a main branch, even though the release branch could have been merged
> into the main branch multiple times since it was forked off.
>
> If we add another merge from release to main, it will be more clear
> that those give different results:
>
>         -A-----E-F-main
>           \   / /
>            B-C-D-release
>
> `git rev-list --exclude-path-first-parent release ^main` returns {B, C, D}.
> I've added commit F to show that we don't necessarily have info on E,
> there could be many commits between it and the tip of main.

OK, you meant to deal with repeated merges into integration branch.

So the idea is to just name the end point merge, say F (you also
could name D as the starting point, but see below), and 

 - initially mark its first parent as UNINTERESTING (i.e. E), and
   other parents as INTERESTING (i.e. D).

 - run the revision traversal machinery, but when propagating the
   UNINTERESTING bit, give it only to the first parent.  The second
   and later parents won't become UNINTERESTING.

 - stop after we exhaust INTERESTING commits.

It would probably work for your idealized topology, but I do not
know what happens when there are criss-cross merges.  In the revised
picture, you are merging down from the B-C-D chain into the
mainline, but once the B-C-D chain becomes longer and diverges too
much from the mainline, it becomes tempting to break the "merge only
in one direction" discipline and merge back from the mainline, to
"catch up", and such a merge will have the history of B-C-D line of
development as its first parent.  Would that screw up the selection
of which line of development is uninteresting?

>> > Add the --exclude-path-first-parent flag,
>> > which works similarly to --first-parent,
>> > but affects only the graph traversal for
>> > the set of commits being excluded.
>> >
>> >    -A-------E-HEAD
>> >      \     /
>> >       B-C-D

In any case, it was totally unclear from the proposed log messsage,
and the overlong option name that does not say much did not help me
guess what you wanted to do with it.  Specifically, it is not clear
what "exclude" means (we do not usually use the word in the context
of revision traversal), and when we talk about "path" in the context
of revision traversal, we almost always mean the paths to the files,
i.e. pathspec that limits and simplifies the shape of the history.
Also, it claims that it works similarly to --first-parent, but what
you are doing is to propagate UNINTERESTING bit on the first-parent
chain, which ends up showing the side branch (i.e. B-C-D chain),
without showing the commits on the first-parent chain (A and E).

What are the words that convey the idea behind this operation
clearly at the conceptual level?  Let's think aloud to see if we can
come up with a better name.

 * first parents are unintertesting

 * show commits on side branch(es)

 * follow side branch.

I think that is closer to the problem you are solving, if I
understand what you wrote above correctly.

Perhaps --show-side-branch or --follow-side-branch?  I dunno.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] git-rev-list: add --exclude-path-first-parent flag
  2021-04-17  7:22     ` Junio C Hamano
@ 2021-04-21  0:16       ` Jerry Zhang
  0 siblings, 0 replies; 7+ messages in thread
From: Jerry Zhang @ 2021-04-21  0:16 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Git Mailing List, Ross Yeager, Abraham Bachrach, Brian Kubisiak

On Sat, Apr 17, 2021 at 12:22 AM Junio C Hamano <gitster@pobox.com> wrote:
>
> Jerry Zhang <jerry@skydio.com> writes:
>
> > On Fri, Apr 16, 2021 at 5:45 PM Junio C Hamano <gitster@pobox.com> wrote:
> >>
> >> Jerry Zhang <jerry@skydio.com> writes:
> >>
> >> > Add the --exclude-path-first-parent flag,
> >> > which works similarly to --first-parent,
> >> > but affects only the graph traversal for
> >> > the set of commits being excluded.
> >> >
> >> >    -A-------E-HEAD
> >> >      \     /
> >> >       B-C-D
> >> >
> >> > In this example, the goal is to return the
> >> > set {B, C, D} which represents a working
> >> > branch that has been merged into main branch
> >> > E. `git rev-list D ^E` will end up returning
> >> > no commits since the exclude path eliminates
> >> > D and its ancestors.
> >> > `git rev-list --exclude-path-first-parent D ^E`
> >> > however will return {B, C, D} as desired.
> >>
> >> It is not clera why you want to have this, instead of doing a more
> >> obvious "D..E^".  Even better is "E^..E", which is often what you
> >> want when viewing a history like my 'seen' that is a straight-line
> >> into which tips of branches are merged.
> > My motivation is to find the point at which a release branch forked off from
> > a main branch, even though the release branch could have been merged
> > into the main branch multiple times since it was forked off.
> >
> > If we add another merge from release to main, it will be more clear
> > that those give different results:
> >
> >         -A-----E-F-main
> >           \   / /
> >            B-C-D-release
> >
> > `git rev-list --exclude-path-first-parent release ^main` returns {B, C, D}.
> > I've added commit F to show that we don't necessarily have info on E,
> > there could be many commits between it and the tip of main.
>
> OK, you meant to deal with repeated merges into integration branch.
>
> So the idea is to just name the end point merge, say F (you also
> could name D as the starting point, but see below), and
>
>  - initially mark its first parent as UNINTERESTING (i.e. E), and
>    other parents as INTERESTING (i.e. D).
>
>  - run the revision traversal machinery, but when propagating the
>    UNINTERESTING bit, give it only to the first parent.  The second
>    and later parents won't become UNINTERESTING.
>
>  - stop after we exhaust INTERESTING commits.
>
> It would probably work for your idealized topology, but I do not
> know what happens when there are criss-cross merges.  In the revised
> picture, you are merging down from the B-C-D chain into the
> mainline, but once the B-C-D chain becomes longer and diverges too
> much from the mainline, it becomes tempting to break the "merge only
> in one direction" discipline and merge back from the mainline, to
> "catch up", and such a merge will have the history of B-C-D line of
> development as its first parent.  Would that screw up the selection
> of which line of development is uninteresting?
Yeah this flag (as well as the --first-parent flag) is mainly only useful
because "git merge" will always put the "branch you're on" as parent 1
and the "branch being merged in" as parent 2. It is possible to break
this assumption with either commit-tree or by merging while on one
branch and pushing to another, but then the user should understand
the consequences of doing so. In our case this isn't possible because
a server handles all merges into the main branches.
>
> >> > Add the --exclude-path-first-parent flag,
> >> > which works similarly to --first-parent,
> >> > but affects only the graph traversal for
> >> > the set of commits being excluded.
> >> >
> >> >    -A-------E-HEAD
> >> >      \     /
> >> >       B-C-D
>
> In any case, it was totally unclear from the proposed log messsage,
> and the overlong option name that does not say much did not help me
> guess what you wanted to do with it.  Specifically, it is not clear
> what "exclude" means (we do not usually use the word in the context
Exclude appears in the first paragraph of the man for git rev-list:
"      List commits that are reachable by following the parent
       links from the given commit(s), but exclude commits that
       are reachable from the one(s) given with a ^ in front of
       them. The output is given in reverse chronological order
       by default."
It appears 5+ more times in the man page with the same meaning.
> of revision traversal), and when we talk about "path" in the context
> of revision traversal, we almost always mean the paths to the files,
> i.e. pathspec that limits and simplifies the shape of the history.
"path" is used in the same man page for the flag "--ancestry-path".
I agree that it could be ambiguous though, so perhaps "chain" would
be better.
> Also, it claims that it works similarly to --first-parent, but what
> you are doing is to propagate UNINTERESTING bit on the first-parent
> chain, which ends up showing the side branch (i.e. B-C-D chain),
> without showing the commits on the first-parent chain (A and E).
>
> What are the words that convey the idea behind this operation
> clearly at the conceptual level?  Let's think aloud to see if we can
> come up with a better name.
>
>  * first parents are unintertesting
>
>  * show commits on side branch(es)
>
>  * follow side branch.
>
> I think that is closer to the problem you are solving, if I
> understand what you wrote above correctly.
>
> Perhaps --show-side-branch or --follow-side-branch?  I dunno.
For my particular use-case I am using it in combination with
--first-parent and a single include and exclude commit to show the
commits on the "side-branch" of the include commit. But if you specify
multiple commits for either or don't use --first-parent, the behavior is
different and I don't think "--side-branch" describes it well in those cases.

Since I don't believe I can predict all use-cases for the flag,
I'd rather name it by what it "does" rather than what it is "for".
If we're concerned about length, maybe "first-parent-not" could
get the meaning across:
- for "rev-list --first-parent A --not B" only first parents are visited
along A's ancestry
- for "rev-list --first-parent-not A --not B" it might be reasonable
that since B is a "not" commit, only first parents are visited along
B's ancestry.

Overall I don't think we can make a name so clear that the user
can avoid the man page anyway.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH V2] git-rev-list: add --first-parent-not flag
  2021-04-17  0:15 [PATCH] git-rev-list: add --exclude-path-first-parent flag Jerry Zhang
  2021-04-17  0:45 ` Junio C Hamano
@ 2021-04-21  0:48 ` Jerry Zhang
  1 sibling, 0 replies; 7+ messages in thread
From: Jerry Zhang @ 2021-04-21  0:48 UTC (permalink / raw)
  To: git, gitster; +Cc: ross, abe, brian.kubisiak, Jerry Zhang

Add the --path-first-parent-not flag, which
causes the traversal of any "not" commits
to visit only the first parent upon encountering
a merge commit.

   -A-----E-F-G--main
     \   / /
      B-C-D--topic

In this example, the goal is to return the
set {B, C, D} which represents a topic
branch that has been merged into main branch.
`git rev-list topic ^main` will end up returning
no commits since excluding main will end up
traversing the commits on topic as well.
`git rev-list --first-parent-not topic ^main`
however will return {B, C, D} as desired.

Add docs for the new flag, and clarify the
doc for --first-parent to indicate that it
applies to traversing the set of included
commits only. The semantics of existing flags
however have not changed.

Signed-off-by: Jerry Zhang <jerry@skydio.com>
---
V1->V2:
- Shortened flag name to --first-parent-not
- Updated example history in commit msg

 Documentation/rev-list-options.txt | 21 ++++++++++++++-------
 blame.c                            |  2 +-
 revision.c                         | 30 ++++++++++++++++++++----------
 revision.h                         |  3 ++-
 shallow.c                          |  2 +-
 t/t6012-rev-list-simplify.sh       | 18 ++++++++++++------
 6 files changed, 50 insertions(+), 26 deletions(-)

diff --git a/Documentation/rev-list-options.txt b/Documentation/rev-list-options.txt
index b1c8f86c6e..6666f13b30 100644
--- a/Documentation/rev-list-options.txt
+++ b/Documentation/rev-list-options.txt
@@ -122,19 +122,26 @@ again.  Equivalent forms are `--min-parents=0` (any commit has 0 or more
 parents) and `--max-parents=-1` (negative numbers denote no upper limit).
 
 --first-parent::
-	Follow only the first parent commit upon seeing a merge
-	commit.  This option can give a better overview when
-	viewing the evolution of a particular topic branch,
-	because merges into a topic branch tend to be only about
-	adjusting to updated upstream from time to time, and
-	this option allows you to ignore the individual commits
-	brought in to your history by such a merge.
+	When finding commits to include, follow only the first
+	parent commit upon seeing a merge commit.  This option
+	can give a better overview when viewing the evolution of
+	a particular topic branch, because merges into a topic
+	branch tend to be only about adjusting to updated upstream
+	from time to time, and this option allows you to ignore
+	the individual commits brought in to your history by such
+	a merge.
 ifdef::git-log[]
 +
 This option also changes default diff format for merge commits
 to `first-parent`, see `--diff-merges=first-parent` for details.
 endif::git-log[]
 
+--first-parent-not::
+	When finding commits to exclude, follow only the first
+	parent commit upon seeing a merge commit.  This causes
+	"not" commits to exclude only commits on that branch itself
+	and not those brought in by a merge.
+
 --not::
 	Reverses the meaning of the '{caret}' prefix (or lack thereof)
 	for all following revision specifiers, up to the next `--not`.
diff --git a/blame.c b/blame.c
index 5018bb8fb2..8354dc0252 100644
--- a/blame.c
+++ b/blame.c
@@ -2615,7 +2615,7 @@ void assign_blame(struct blame_scoreboard *sb, int opt)
 		else {
 			commit->object.flags |= UNINTERESTING;
 			if (commit->object.parsed)
-				mark_parents_uninteresting(commit);
+				mark_parents_uninteresting(sb->revs, commit);
 		}
 		/* treat root commit as boundary */
 		if (!commit->parents && !sb->show_root)
diff --git a/revision.c b/revision.c
index 553c0faa9b..4b87b48235 100644
--- a/revision.c
+++ b/revision.c
@@ -268,7 +268,7 @@ static void commit_stack_clear(struct commit_stack *stack)
 	stack->nr = stack->alloc = 0;
 }
 
-static void mark_one_parent_uninteresting(struct commit *commit,
+static void mark_one_parent_uninteresting(struct rev_info *revs, struct commit *commit,
 					  struct commit_stack *pending)
 {
 	struct commit_list *l;
@@ -285,20 +285,26 @@ static void mark_one_parent_uninteresting(struct commit *commit,
 	 * wasn't uninteresting), in which case we need
 	 * to mark its parents recursively too..
 	 */
-	for (l = commit->parents; l; l = l->next)
+	for (l = commit->parents; l; l = l->next) {
 		commit_stack_push(pending, l->item);
+		if (revs && revs->first_parent_not)
+			break;
+	}
 }
 
-void mark_parents_uninteresting(struct commit *commit)
+void mark_parents_uninteresting(struct rev_info *revs, struct commit *commit)
 {
 	struct commit_stack pending = COMMIT_STACK_INIT;
 	struct commit_list *l;
 
-	for (l = commit->parents; l; l = l->next)
-		mark_one_parent_uninteresting(l->item, &pending);
+	for (l = commit->parents; l; l = l->next) {
+		mark_one_parent_uninteresting(revs, l->item, &pending);
+		if (revs && revs->first_parent_not)
+			break;
+	}
 
 	while (pending.nr > 0)
-		mark_one_parent_uninteresting(commit_stack_pop(&pending),
+		mark_one_parent_uninteresting(revs, commit_stack_pop(&pending),
 					      &pending);
 
 	commit_stack_clear(&pending);
@@ -437,7 +443,7 @@ static struct commit *handle_commit(struct rev_info *revs,
 		if (repo_parse_commit(revs->repo, commit) < 0)
 			die("unable to parse commit %s", name);
 		if (flags & UNINTERESTING) {
-			mark_parents_uninteresting(commit);
+			mark_parents_uninteresting(revs, commit);
 
 			if (!revs->topo_order || !generation_numbers_enabled(the_repository))
 				revs->limited = 1;
@@ -1120,7 +1126,7 @@ static int process_parents(struct rev_info *revs, struct commit *commit,
 			if (repo_parse_commit_gently(revs->repo, p, 1) < 0)
 				continue;
 			if (p->parents)
-				mark_parents_uninteresting(p);
+				mark_parents_uninteresting(revs, p);
 			if (p->object.flags & SEEN)
 				continue;
 			p->object.flags |= SEEN;
@@ -1128,6 +1134,8 @@ static int process_parents(struct rev_info *revs, struct commit *commit,
 				commit_list_insert_by_date(p, list);
 			if (queue)
 				prio_queue_put(queue, p);
+			if (revs->first_parent_not)
+				break;
 		}
 		return 0;
 	}
@@ -1418,7 +1426,7 @@ static int limit_list(struct rev_info *revs)
 		if (process_parents(revs, commit, &list, NULL) < 0)
 			return -1;
 		if (obj->flags & UNINTERESTING) {
-			mark_parents_uninteresting(commit);
+			mark_parents_uninteresting(revs, commit);
 			slop = still_interesting(list, date, slop, &interesting_cache);
 			if (slop)
 				continue;
@@ -2214,6 +2222,8 @@ static int handle_revision_opt(struct rev_info *revs, int argc, const char **arg
 		return argcount;
 	} else if (!strcmp(arg, "--first-parent")) {
 		revs->first_parent_only = 1;
+	} else if (!strcmp(arg, "--first-parent-not")) {
+		revs->first_parent_not = 1;
 	} else if (!strcmp(arg, "--ancestry-path")) {
 		revs->ancestry_path = 1;
 		revs->simplify_history = 0;
@@ -3341,7 +3351,7 @@ static void explore_walk_step(struct rev_info *revs)
 		return;
 
 	if (c->object.flags & UNINTERESTING)
-		mark_parents_uninteresting(c);
+		mark_parents_uninteresting(revs, c);
 
 	for (p = c->parents; p; p = p->next)
 		test_flag_and_insert(&info->explore_queue, p->item, TOPO_WALK_EXPLORED);
diff --git a/revision.h b/revision.h
index a24f72dcd1..82afbd0f8a 100644
--- a/revision.h
+++ b/revision.h
@@ -164,6 +164,7 @@ struct rev_info {
 			bisect:1,
 			ancestry_path:1,
 			first_parent_only:1,
+			first_parent_not:1,
 			line_level_traverse:1,
 			tree_blobs_in_commit_order:1,
 
@@ -405,7 +406,7 @@ const char *get_revision_mark(const struct rev_info *revs,
 void put_revision_mark(const struct rev_info *revs,
 		       const struct commit *commit);
 
-void mark_parents_uninteresting(struct commit *commit);
+void mark_parents_uninteresting(struct rev_info *revs, struct commit *commit);
 void mark_tree_uninteresting(struct repository *r, struct tree *tree);
 void mark_trees_uninteresting_sparse(struct repository *r, struct oidset *trees);
 
diff --git a/shallow.c b/shallow.c
index 9ed18eb884..71e5876f37 100644
--- a/shallow.c
+++ b/shallow.c
@@ -603,7 +603,7 @@ static int mark_uninteresting(const char *refname, const struct object_id *oid,
 	if (!commit)
 		return 0;
 	commit->object.flags |= UNINTERESTING;
-	mark_parents_uninteresting(commit);
+	mark_parents_uninteresting(NULL, commit);
 	return 0;
 }
 
diff --git a/t/t6012-rev-list-simplify.sh b/t/t6012-rev-list-simplify.sh
index 4f7fa8b6c0..7da8542e58 100755
--- a/t/t6012-rev-list-simplify.sh
+++ b/t/t6012-rev-list-simplify.sh
@@ -16,13 +16,12 @@ unnote () {
 }
 
 #
-# Create a test repo with interesting commit graph:
+# Create a test repo with an interesting commit graph:
 #
-# A--B----------G--H--I--K--L
-#  \  \           /     /
-#   \  \         /     /
-#    C------E---F     J
-#        \_/
+# A-----B-----G--H--I--K--L
+#  \     \      /     /
+#   \     \    /     /
+#    C--D--E--F     J
 #
 # The commits are laid out from left-to-right starting with
 # the root commit A and terminating at the tip commit L.
@@ -142,6 +141,13 @@ check_result 'I B A' --author-date-order -- file
 check_result 'H' --first-parent -- another-file
 check_result 'H' --first-parent --topo-order -- another-file
 
+check_result 'L K I H G B A' --first-parent L
+check_result 'F E D C' --first-parent-not F ^L
+check_result '' F ^L
+check_result 'L K I H G J' L ^F
+check_result 'L K I H G B J' --first-parent-not L ^F
+check_result 'L K I H G B' --first-parent-not --first-parent L ^F
+
 check_result 'E C B A' --full-history E -- lost
 test_expect_success 'full history simplification without parent' '
 	printf "%s\n" E C B A >expect &&
-- 
2.29.0


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, back to index

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-04-17  0:15 [PATCH] git-rev-list: add --exclude-path-first-parent flag Jerry Zhang
2021-04-17  0:45 ` Junio C Hamano
2021-04-17  1:07   ` Jerry Zhang
2021-04-17  4:09     ` Felipe Contreras
2021-04-17  7:22     ` Junio C Hamano
2021-04-21  0:16       ` Jerry Zhang
2021-04-21  0:48 ` [PATCH V2] git-rev-list: add --first-parent-not flag Jerry Zhang

Git Mailing List Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/git/0 git/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 git git/ https://lore.kernel.org/git \
		git@vger.kernel.org
	public-inbox-index git

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.git


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git