All of lore.kernel.org
 help / color / mirror / Atom feed
From: Stephan Beyer <s-beyer@gmx.net>
To: git@vger.kernel.org
Cc: Stephan Beyer <s-beyer@gmx.net>,
	Christian Couder <chriscool@tuxfamily.org>
Subject: [PATCH 14/16] bisect: use a modified breadth-first search to find relevant weights
Date: Fri, 26 Feb 2016 03:04:40 +0100	[thread overview]
Message-ID: <1456452282-10325-15-git-send-email-s-beyer@gmx.net> (raw)
In-Reply-To: <1456452282-10325-1-git-send-email-s-beyer@gmx.net>

The idea is to reverse the DAG and perform a modified breadth-first search
on it, starting on all sources of the reversed DAG.
Before each visit of a commit, its weight is induced.
This only works for non-merge commits, so the BFS stops prematurely on
merge commits (that are collected in a list).
Merge commits from that collection are considered for further visits
as soon as all their parents have been visited.
Their weights are computed using compute_weight().
Each BFS walk ends when the computed weight is falling or halfway.

Signed-off-by: Stephan Beyer <s-beyer@gmx.net>
---

In other words: walk the history up, compute the weight of each
commit and stop when you know a better commit cannot follow.

This way, the expensive "compute_weight()" function is not called
on merge commits that will not help to find the maximum distance.

I have to mention that I also had a more complex idea, involving
a binary search on specifically ordered merge commits, and then
inducing the weights of the commits between those merge commits.
However, this is pretty nontrivial, error-prone by definition,
and the compute_weight() calls during the binary search can be more
expensive than during the "bottom-up" BFS walk.
So this is clearly the better idea. (And also follows the KISS rule.)

For the ads: ~3-4 seconds

 bisect.c | 246 ++++++++++++++++++++++++++++++++++++++++-----------------------
 1 file changed, 158 insertions(+), 88 deletions(-)

diff --git a/bisect.c b/bisect.c
index 1d1f61c..827a82a 100644
--- a/bisect.c
+++ b/bisect.c
@@ -30,6 +30,9 @@ static unsigned marker;
 struct node_data {
 	int weight;
 	unsigned marked;
+	unsigned parents;
+	unsigned visited : 1;
+	struct commit_list *children;
 };
 
 #define DEBUG_BISECT 0
@@ -110,16 +113,6 @@ static int count_interesting_parents(struct commit *commit)
 	return count;
 }
 
-static inline int halfway(struct commit *commit)
-{
-	/*
-	 * Don't short-cut something we are not going to return!
-	 */
-	if (commit->object.flags & TREESAME)
-		return 0;
-	return !distance_direction(commit);
-}
-
 #if !DEBUG_BISECT
 #define show_list(a,b,c) do { ; } while (0)
 #else
@@ -340,90 +333,167 @@ static void compute_all_weights(struct commit_list *list,
 	show_list("bisection 2 counted all", counted, list);
 }
 
-/* At the moment this is basically the same as compute_all_weights()
- * but with a halfway shortcut */
+static struct commit_list *build_reversed_dag(struct commit_list *list,
+					      struct node_data *nodes)
+{
+	struct commit_list *sources = NULL;
+	struct commit_list *p;
+	int n = 0;
+	for (p = list; p; p = p->next)
+		p->item->util = &nodes[n++];
+	for (p = list; p; p = p->next) {
+		struct commit_list *parent;
+		struct commit *commit = p->item;
+		for (parent = commit->parents; parent; parent = parent->next) {
+			if (!(parent->item->object.flags & UNINTERESTING)) {
+				struct commit_list **next = &node_data(parent->item)->children;
+				commit_list_insert(commit, next);
+				node_data(commit)->parents++;
+			}
+		}
+	}
+
+	/* find all sources */
+	for (p = list; p; p = p->next) {
+		if (node_data(p->item)->parents == 0)
+			commit_list_insert(p->item, &sources);
+	}
+
+	return sources;
+}
+
+static void commit_list_insert_unique(struct commit *item,
+				      struct commit_list **list)
+{
+	if (!*list || item < (*list)->item) /* empty list or item will be first */
+		commit_list_insert(item, list);
+	else if (item != (*list)->item) { /* item will not be first or not inserted */
+		struct commit_list *p = *list;
+		for (; p->next && p->next->item < item; p = p->next);
+		if (!p->next || item != p->next->item) /* not already inserted */
+			commit_list_insert(item, &p->next);
+	}
+}
+
+/* do a BFS on the reversed DAG (starting from commits in queue), stop at merge commits */
+static void bfs_up_to_merges(struct commit_list *queue,
+			     struct commit_list **merges)
+{
+	assert(queue);
+	struct commit_list **last;
+	for (last = &queue->next; *last; last = &(*last)->next);
+
+	while (queue) {
+		struct commit *top = queue->item;
+		struct commit_list *p;
+
+		if (distance_direction(top) > 0) {
+			node_data(top)->visited = 1;
+
+			/* queue children */
+			for (p = node_data(top)->children; p; p = p->next) {
+				if (node_data(p->item)->parents > 1) /* child is a merge */
+					commit_list_insert_unique(p->item, merges);
+				else {
+					node_data(p->item)->weight = node_data(top)->weight;
+					if (!(p->item->object.flags & TREESAME))
+						node_data(p->item)->weight++;
+					last = commit_list_append(p->item, last);
+				}
+			}
+		}
+
+		pop_commit(&queue);
+	}
+}
+
+static int all_parents_are_visited(struct commit *merge)
+{
+	struct commit_list *p;
+	for (p = merge->parents; p; p = p->next) {
+		if (p->item->util && !node_data(p->item)->visited)
+			return 0;
+	}
+	return 1;
+}
+
+static struct commit *extract_merge_to_queue(struct commit_list **merges)
+{
+	assert(merges);
+
+	struct commit_list *p, *q;
+	struct commit *found;
+
+	/* find a merge that is ready, i.e. all parents have been computed */
+	for (q = NULL, p = *merges; p && !all_parents_are_visited(p->item);
+	     q = p, p = p->next);
+	if (!p)
+		return NULL;
+
+	/* remove that commit from the list and return it */
+	if (q) {
+		assert(q->next == p);
+		q->next = p->next;
+	} else /* found first element of list */
+		*merges = p->next;
+	found = p->item;
+	free(p);
+
+	return found;
+}
+
+static int find_new_queue_from_merges(struct commit_list **queue,
+				      struct commit_list **merges)
+{
+	if (*merges) {
+		struct commit *merge = extract_merge_to_queue(merges);
+		*queue = NULL;
+		if (merge) {
+			commit_list_insert(merge, queue);
+			return 1;
+		}
+	}
+	return 0;
+}
+
+static void compute_merge_weights(struct commit_list *merges)
+{
+	struct commit_list *p;
+	for (p = merges; p; p = p->next)
+		compute_weight(p->item);
+}
+
+static void modified_bfs(struct commit_list *queue)
+{
+	struct commit_list *merges = NULL;
+	bfs_up_to_merges(queue, &merges);
+	while (find_new_queue_from_merges(&queue, &merges)) {
+		compute_merge_weights(queue);
+		bfs_up_to_merges(queue, &merges);
+	}
+}
+
+/* The idea is to reverse the DAG and perform a modified breadth-first search
+ * on it, starting on all sources of the reversed DAG.
+ * Before each visit of a commit, its weight is induced.
+ * This only works for non-merge commits, so the BFS stops prematurely on
+ * merge commits (that are collected in a list).
+ * Merge commits from that collection are considered for further visits
+ * as soon as all parents have been visited.
+ * Their weights are computed using compute_weight().
+ * Each BFS branch ends when the computed weight is falling or halfway.
+ */
 static void compute_relevant_weights(struct commit_list *list,
 				     struct node_data *weights)
 {
-	int n, counted;
 	struct commit_list *p;
+	struct commit_list *sources = build_reversed_dag(list, weights);
 
-	counted = 0;
+	for (p = sources; p; p = p->next)
+		node_data(p->item)->weight = 1;
+	modified_bfs(sources);
 
-	for (n = 0, p = list; p; p = p->next) {
-		struct commit *commit = p->item;
-		unsigned flags = commit->object.flags;
-
-		commit->util = &weights[n++];
-		switch (count_interesting_parents(commit)) {
-		case 0:
-			if (!(flags & TREESAME)) {
-				node_data(commit)->weight = 1;
-				counted++;
-				show_list("bisection 2 count one",
-					  counted, list);
-			}
-			break;
-		case 1:
-			node_data(commit)->weight = -1;
-			break;
-		default:
-			node_data(commit)->weight = -2;
-			break;
-		}
-	}
-
-	show_list("bisection 2 initialize", counted, list);
-
-	for (p = list; p; p = p->next) {
-		if (!(p->item->object.flags & UNINTERESTING)
-		 && (node_data(p->item)->weight == -2)) {
-			compute_weight(p->item);
-
-			/* Does it happen to be at exactly half-way? */
-			if (halfway(p->item))
-				return;
-			counted++;
-		}
-	}
-
-	show_list("bisection 2 compute_weight", counted, list);
-
-	while (counted < total) {
-		for (p = list; p; p = p->next) {
-			struct commit_list *q;
-			unsigned flags = p->item->object.flags;
-
-			if (0 <= node_data(p->item)->weight)
-				continue;
-			for (q = p->item->parents; q; q = q->next) {
-				if (q->item->object.flags & UNINTERESTING)
-					continue;
-				if (0 <= node_data(q->item)->weight)
-					break;
-			}
-			if (!q)
-				continue;
-
-			/*
-			 * weight for p is unknown but q is known.
-			 * add one for p itself if p is to be counted,
-			 * otherwise inherit it from q directly.
-			 */
-			node_data(p->item)->weight = node_data(q->item)->weight;
-			if (!(flags & TREESAME)) {
-				node_data(p->item)->weight++;
-				counted++;
-				show_list("bisection 2 count one",
-					  counted, list);
-			}
-
-			/* Does it happen to be at exactly half-way? */
-			if (halfway(p->item))
-				return;
-		}
-	}
-	show_list("bisection 2 counted all", counted, list);
+	show_list("bisection 3 result", total, list);
 }
 
 struct commit_list *find_bisection(struct commit_list *list,
-- 
2.7.1.354.gd492730.dirty

  parent reply	other threads:[~2016-02-26  2:06 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-02-26  2:04 [PATCH 00/16] git bisect improvements Stephan Beyer
2016-02-26  2:04 ` [PATCH 01/16] bisect: write about `bisect next` in documentation Stephan Beyer
2016-02-26  8:02   ` Jacob Keller
2016-02-26 18:47   ` Junio C Hamano
2016-02-27 13:45     ` Stephan Beyer
2016-02-27 18:03       ` Junio C Hamano
2016-02-27 19:38         ` Stephan Beyer
2016-02-28 18:28           ` Junio C Hamano
2016-02-26  2:04 ` [PATCH 02/16] bisect: add test for the bisect algorithm Stephan Beyer
2016-02-26  6:53   ` Christian Couder
2016-02-26 21:38     ` Stephan Beyer
2016-02-27 11:40       ` Christian Couder
2016-02-27 12:42         ` Matthieu Moy
2016-02-26  2:04 ` [PATCH 03/16] bisect: make bisect compile if DEBUG_BISECT is set Stephan Beyer
2016-02-26  2:04 ` [PATCH 04/16] bisect: make algorithm behavior independent of DEBUG_BISECT Stephan Beyer
2016-02-26  2:04 ` [PATCH 05/16] bisect: get rid of recursion in count_distance() Stephan Beyer
2016-02-26  2:04 ` [PATCH 06/16] bisect: use struct node_data array instead of int array Stephan Beyer
2016-02-26  2:04 ` [PATCH 07/16] bisect: replace clear_distance() by unique markers Stephan Beyer
2016-02-26  2:04 ` [PATCH 08/16] bisect: use commit instead of commit list as arguments when appropriate Stephan Beyer
2016-02-26  3:10   ` Junio C Hamano
2016-02-26  2:04 ` [PATCH 09/16] bisect: extract get_distance() function from code duplication Stephan Beyer
2016-02-26  2:04 ` [PATCH 10/16] bisect: introduce distance_direction() Stephan Beyer
2016-02-26  2:04 ` [PATCH 11/16] bisect: make total number of commits global Stephan Beyer
2016-02-26  2:04 ` [PATCH 12/16] bisect: rename count_distance() to compute_weight() Stephan Beyer
2016-02-26  2:04 ` [PATCH 13/16] bisect: prepare for different algorithms based on find_all Stephan Beyer
2016-02-26  2:04 ` Stephan Beyer [this message]
2016-02-26  3:09   ` [PATCH 14/16] bisect: use a modified breadth-first search to find relevant weights Junio C Hamano
2016-02-26 20:55     ` Stephan Beyer
2016-02-26  2:04 ` [PATCH 15/16] bisect: compute best bisection in compute_relevant_weights() Stephan Beyer
2016-02-26  2:04 ` [PATCH 16/16] bisect: get back halfway shortcut Stephan Beyer
2016-03-20 18:50 ` [PATCH 00/16] git bisect improvements Pranit Bauva
2016-03-21 22:22   ` Stephan Beyer
2016-03-22  7:35     ` Christian Couder
2016-03-22 11:35     ` Pranit Bauva

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1456452282-10325-15-git-send-email-s-beyer@gmx.net \
    --to=s-beyer@gmx.net \
    --cc=chriscool@tuxfamily.org \
    --cc=git@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.