All of lore.kernel.org
 help / color / mirror / Atom feed
From: Johan Herland <johan@herland.net>
To: git@vger.kernel.org
Cc: gitster@pobox.com, johan@herland.net, Johannes.Schindelin@gmx.de,
	trast@student.ethz.ch, tavestbo@trolltech.com,
	git@drmicha.warpmail.net, chriscool@tuxfamily.org,
	spearce@spearce.org, sam@vilain.net
Subject: [RFC/PATCHv7 13/22] Refactor notes code to concatenate multiple notes annotating the same object
Date: Fri, 09 Oct 2009 12:22:09 +0200	[thread overview]
Message-ID: <1255083738-23263-15-git-send-email-johan@herland.net> (raw)
In-Reply-To: <1255083738-23263-1-git-send-email-johan@herland.net>

Currently, having multiple notes referring to the same commit from various
locations in the notes tree is strongly discouraged, since only one of those
notes will be parsed and shown.

This patch teaches the notes code to _concatenate_ multiple notes that
annotate the same commit. Notes are concatenated by creating a new blob
object containing the concatenation of the notes in question, and
replacing them with the concatenated note in the internal notes tree
structure.

Getting the concatenation right requires being more proactive in unpacking
subtree entries in the internal notes tree structure, so that we don't return
a note prematurely (i.e. before having found all other notes that annotate
the same object). As such, this patch may incur a small performance penalty.

Suggested-by: Sam Vilain <sam@vilain.net>
Re-suggested-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Johan Herland <johan@herland.net>
---
 notes.c |  243 +++++++++++++++++++++++++++++++++++++++++---------------------
 1 files changed, 161 insertions(+), 82 deletions(-)

diff --git a/notes.c b/notes.c
index 210c4b2..50a4672 100644
--- a/notes.c
+++ b/notes.c
@@ -59,115 +59,196 @@ static void load_subtree(struct leaf_node *subtree, struct int_node *node,
 		unsigned int n);
 
 /*
- * To find a leaf_node:
+ * Search the tree until the appropriate location for the given key is found:
  * 1. Start at the root node, with n = 0
- * 2. Use the nth nibble of the key as an index into a:
- *    - If a[n] is an int_node, recurse into that node and increment n
- *    - If a leaf_node with matching key, return leaf_node (assert note entry)
+ * 2. If a[0] at the current level is a matching subtree entry, unpack that
+ *    subtree entry and remove it; restart search at the current level.
+ * 3. Use the nth nibble of the key as an index into a:
+ *    - If a[n] is an int_node, recurse from #2 into that node and increment n
  *    - If a matching subtree entry, unpack that subtree entry (and remove it);
  *      restart search at the current level.
- *    - Otherwise, we end up at a NULL pointer, or a non-matching leaf_node.
- *      Backtrack out of the recursion, one level at a time and check a[0]:
- *      - If a[0] at the current level is a matching subtree entry, unpack that
- *        subtree entry (and remove it); restart search at the current level.
+ *    - Otherwise, we have found one of the following:
+ *      - a subtree entry which does not match the key
+ *      - a note entry which may or may not match the key
+ *      - an unused leaf node (NULL)
+ *      In any case, set *tree and *n, and return pointer to the tree location.
  */
-static struct leaf_node *note_tree_find(struct int_node *tree, unsigned char n,
-		const unsigned char *key_sha1)
+static void **note_tree_search(struct int_node **tree,
+		unsigned char *n, const unsigned char *key_sha1)
 {
 	struct leaf_node *l;
-	unsigned char i = GET_NIBBLE(n, key_sha1);
-	void *p = tree->a[i];
+	unsigned char i;
+	void *p = (*tree)->a[0];
 
+	if (GET_PTR_TYPE(p) == PTR_TYPE_SUBTREE) {
+		l = (struct leaf_node *) CLR_PTR_TYPE(p);
+		if (!SUBTREE_SHA1_PREFIXCMP(key_sha1, l->key_sha1)) {
+			/* unpack tree and resume search */
+			(*tree)->a[0] = NULL;
+			load_subtree(l, *tree, *n);
+			free(l);
+			return note_tree_search(tree, n, key_sha1);
+		}
+	}
+
+	i = GET_NIBBLE(*n, key_sha1);
+	p = (*tree)->a[i];
 	switch(GET_PTR_TYPE(p)) {
 	case PTR_TYPE_INTERNAL:
-		l = note_tree_find(CLR_PTR_TYPE(p), n + 1, key_sha1);
-		if (l)
-			return l;
-		break;
-	case PTR_TYPE_NOTE:
-		l = (struct leaf_node *) CLR_PTR_TYPE(p);
-		if (!hashcmp(key_sha1, l->key_sha1))
-			return l; /* return note object matching given key */
-		break;
+		*tree = CLR_PTR_TYPE(p);
+		(*n)++;
+		return note_tree_search(tree, n, key_sha1);
 	case PTR_TYPE_SUBTREE:
 		l = (struct leaf_node *) CLR_PTR_TYPE(p);
 		if (!SUBTREE_SHA1_PREFIXCMP(key_sha1, l->key_sha1)) {
 			/* unpack tree and resume search */
-			tree->a[i] = NULL;
-			load_subtree(l, tree, n);
+			(*tree)->a[i] = NULL;
+			load_subtree(l, *tree, *n);
 			free(l);
-			return note_tree_find(tree, n, key_sha1);
+			return note_tree_search(tree, n, key_sha1);
 		}
-		break;
-	case PTR_TYPE_NULL:
+		/* fall through */
 	default:
-		assert(!p);
-		break;
+		return &((*tree)->a[i]);
 	}
+}
 
-	/*
-	 * Did not find key at this (or any lower) level.
-	 * Check if there's a matching subtree entry in tree->a[0].
-	 * If so, unpack tree and resume search.
-	 */
-	p = tree->a[0];
-	if (GET_PTR_TYPE(p) != PTR_TYPE_SUBTREE)
-		return NULL;
-	l = (struct leaf_node *) CLR_PTR_TYPE(p);
-	if (!SUBTREE_SHA1_PREFIXCMP(key_sha1, l->key_sha1)) {
-		/* unpack tree and resume search */
-		tree->a[0] = NULL;
-		load_subtree(l, tree, n);
-		free(l);
-		return note_tree_find(tree, n, key_sha1);
+/*
+ * To find a leaf_node:
+ * Search to the tree location appropriate for the given key:
+ * If a note entry with matching key, return the note entry, else return NULL.
+ */
+static struct leaf_node *note_tree_find(struct int_node *tree, unsigned char n,
+		const unsigned char *key_sha1)
+{
+	void **p = note_tree_search(&tree, &n, key_sha1);
+	if (GET_PTR_TYPE(*p) == PTR_TYPE_NOTE) {
+		struct leaf_node *l = (struct leaf_node *) CLR_PTR_TYPE(*p);
+		if (!hashcmp(key_sha1, l->key_sha1))
+			return l;
 	}
 	return NULL;
 }
 
+/* Create a new blob object by concatenating the two given blob objects */
+static int concatenate_notes(unsigned char *cur_sha1,
+		const unsigned char *new_sha1)
+{
+	char *cur_msg, *new_msg, *buf;
+	unsigned long cur_len, new_len, buf_len;
+	enum object_type cur_type, new_type;
+	int ret;
+
+	/* read in both note blob objects */
+	new_msg = read_sha1_file(new_sha1, &new_type, &new_len);
+	if (!new_msg || !new_len || new_type != OBJ_BLOB) {
+		free(new_msg);
+		return 0;
+	}
+	cur_msg = read_sha1_file(cur_sha1, &cur_type, &cur_len);
+	if (!cur_msg || !cur_len || cur_type != OBJ_BLOB) {
+		free(cur_msg);
+		free(new_msg);
+		hashcpy(cur_sha1, new_sha1);
+		return 0;
+	}
+
+	/* we will separate the notes by a newline anyway */
+	if (cur_msg[cur_len - 1] == '\n')
+		cur_len--;
+
+	/* concatenate cur_msg and new_msg into buf */
+	buf_len = cur_len + 1 + new_len;
+	buf = (char *) xmalloc(buf_len);
+	memcpy(buf, cur_msg, cur_len);
+	buf[cur_len] = '\n';
+	memcpy(buf + cur_len + 1, new_msg, new_len);
+
+	free(cur_msg);
+	free(new_msg);
+
+	/* create a new blob object from buf */
+	ret = write_sha1_file(buf, buf_len, "blob", cur_sha1);
+	free(buf);
+	return ret;
+}
+
 /*
  * To insert a leaf_node:
- * 1. Start at the root node, with n = 0
- * 2. Use the nth nibble of the key as an index into a:
- *    - If a[n] is NULL, store the tweaked pointer directly into a[n]
- *    - If a[n] is an int_node, recurse into that node and increment n
- *    - If a[n] is a leaf_node:
- *      1. Check if they're equal, and handle that (abort? overwrite?)
- *      2. Create a new int_node, and store both leaf_nodes there
- *      3. Store the new int_node into a[n].
+ * Search to the tree location appropriate for the given leaf_node's key:
+ * - If location is unused (NULL), store the tweaked pointer directly there
+ * - If location holds a note entry that matches the note-to-be-inserted, then
+ *   concatenate the two notes.
+ * - If location holds a note entry that matches the subtree-to-be-inserted,
+ *   then unpack the subtree-to-be-inserted into the location.
+ * - If location holds a matching subtree entry, unpack the subtree at that
+ *   location, and restart the insert operation from that level.
+ * - Else, create a new int_node, holding both the node-at-location and the
+ *   node-to-be-inserted, and store the new int_node into the location.
  */
-static int note_tree_insert(struct int_node *tree, unsigned char n,
-		const struct leaf_node *entry, unsigned char type)
+static void note_tree_insert(struct int_node *tree, unsigned char n,
+		struct leaf_node *entry, unsigned char type)
 {
 	struct int_node *new_node;
-	const struct leaf_node *l;
-	int ret;
-	unsigned char i = GET_NIBBLE(n, entry->key_sha1);
-	void *p = tree->a[i];
-	assert(GET_PTR_TYPE(entry) == PTR_TYPE_NULL);
-	switch(GET_PTR_TYPE(p)) {
+	struct leaf_node *l;
+	void **p = note_tree_search(&tree, &n, entry->key_sha1);
+
+	assert(GET_PTR_TYPE(entry) == 0); /* no type bits set */
+	l = (struct leaf_node *) CLR_PTR_TYPE(*p);
+	switch(GET_PTR_TYPE(*p)) {
 	case PTR_TYPE_NULL:
-		assert(!p);
-		tree->a[i] = SET_PTR_TYPE(entry, type);
-		return 0;
-	case PTR_TYPE_INTERNAL:
-		return note_tree_insert(CLR_PTR_TYPE(p), n + 1, entry, type);
-	default:
-		assert(GET_PTR_TYPE(p) == PTR_TYPE_NOTE ||
-			GET_PTR_TYPE(p) == PTR_TYPE_SUBTREE);
-		l = (const struct leaf_node *) CLR_PTR_TYPE(p);
-		if (!hashcmp(entry->key_sha1, l->key_sha1))
-			return -1; /* abort insert on matching key */
-		new_node = (struct int_node *)
-			xcalloc(sizeof(struct int_node), 1);
-		ret = note_tree_insert(new_node, n + 1,
-			CLR_PTR_TYPE(p), GET_PTR_TYPE(p));
-		if (ret) {
-			free(new_node);
-			return -1;
+		assert(!*p);
+		*p = SET_PTR_TYPE(entry, type);
+		return;
+	case PTR_TYPE_NOTE:
+		switch (type) {
+		case PTR_TYPE_NOTE:
+			if (!hashcmp(l->key_sha1, entry->key_sha1)) {
+				/* skip concatenation if l == entry */
+				if (!hashcmp(l->val_sha1, entry->val_sha1))
+					return;
+
+				if (concatenate_notes(l->val_sha1,
+						entry->val_sha1))
+					die("failed to concatenate note %s "
+					    "into note %s for commit %s",
+					    sha1_to_hex(entry->val_sha1),
+					    sha1_to_hex(l->val_sha1),
+					    sha1_to_hex(l->key_sha1));
+				free(entry);
+				return;
+			}
+			break;
+		case PTR_TYPE_SUBTREE:
+			if (!SUBTREE_SHA1_PREFIXCMP(l->key_sha1,
+						    entry->key_sha1)) {
+				/* unpack 'entry' */
+				load_subtree(entry, tree, n);
+				free(entry);
+				return;
+			}
+			break;
+		}
+		break;
+	case PTR_TYPE_SUBTREE:
+		if (!SUBTREE_SHA1_PREFIXCMP(entry->key_sha1, l->key_sha1)) {
+			/* unpack 'l' and restart insert */
+			*p = NULL;
+			load_subtree(l, tree, n);
+			free(l);
+			note_tree_insert(tree, n, entry, type);
+			return;
 		}
-		tree->a[i] = SET_PTR_TYPE(new_node, PTR_TYPE_INTERNAL);
-		return note_tree_insert(new_node, n + 1, entry, type);
+		break;
 	}
+
+	/* non-matching leaf_node */
+	assert(GET_PTR_TYPE(*p) == PTR_TYPE_NOTE ||
+	       GET_PTR_TYPE(*p) == PTR_TYPE_SUBTREE);
+	new_node = (struct int_node *) xcalloc(sizeof(struct int_node), 1);
+	note_tree_insert(new_node, n + 1, l, GET_PTR_TYPE(*p));
+	*p = SET_PTR_TYPE(new_node, PTR_TYPE_INTERNAL);
+	note_tree_insert(new_node, n + 1, entry, type);
 }
 
 /* Free the entire notes data contained in the given tree */
@@ -220,7 +301,6 @@ static void load_subtree(struct leaf_node *subtree, struct int_node *node,
 {
 	unsigned char commit_sha1[20];
 	unsigned int prefix_len;
-	int status;
 	void *buf;
 	struct tree_desc desc;
 	struct name_entry entry;
@@ -254,8 +334,7 @@ static void load_subtree(struct leaf_node *subtree, struct int_node *node,
 				l->key_sha1[19] = (unsigned char) len;
 				type = PTR_TYPE_SUBTREE;
 			}
-			status = note_tree_insert(node, n, l, type);
-			assert(!status);
+			note_tree_insert(node, n, l, type);
 		}
 	}
 	free(buf);
-- 
1.6.4.304.g1365c.dirty

  parent reply	other threads:[~2009-10-09 10:31 UTC|newest]

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-10-09 10:21 [RFC/PATCHv7 00/22] git notes Johan Herland
2009-10-09 10:21 ` Johan Herland
2009-10-09 10:32   ` Johan Herland
2009-10-09 10:21 ` [RFC/PATCHv7 01/22] Introduce commit notes Johan Herland
2009-10-09 10:21 ` [RFC/PATCHv7 02/22] Add a script to edit/inspect notes Johan Herland
2009-10-09 10:21 ` [RFC/PATCHv7 03/22] Speed up git notes lookup Johan Herland
2009-10-09 10:22 ` [RFC/PATCHv7 04/22] Add an expensive test for git-notes Johan Herland
2009-10-09 10:22 ` [RFC/PATCHv7 05/22] Teach "-m <msg>" and "-F <file>" to "git notes edit" Johan Herland
2009-10-09 10:22 ` [RFC/PATCHv7 06/22] fast-import: Add support for importing commit notes Johan Herland
2011-01-31 18:33   ` [RFC] fast-import: notemodify (N) command Jonathan Nieder
2011-01-31 18:48     ` [Vcs-fast-import-devs] " Sverre Rabbelier
2011-01-31 19:01       ` Jonathan Nieder
2011-01-31 21:19         ` Sverre Rabbelier
2011-01-31 22:37           ` Sam Vilain
2011-02-01  0:13             ` Sverre Rabbelier
2009-10-09 10:22 ` [RFC/PATCHv7 07/22] t3302-notes-index-expensive: Speed up create_repo() Johan Herland
2009-10-09 10:22 ` [RFC/PATCHv7 08/22] Add flags to get_commit_notes() to control the format of the note string Johan Herland
2009-10-09 10:22 ` [RFC/PATCHv7 09/22] Add '%N'-format for pretty-printing commit notes Johan Herland
2009-10-09 10:22 ` [RFC/PATCHv7 10/22] Teach notes code to free its internal data structures on request Johan Herland
2009-10-09 10:22 ` [RFC/PATCHv7 11/22] Teach the notes lookup code to parse notes trees with various fanout schemes Johan Herland
2009-10-09 10:22 ` [RFC/PATCHv7 12/22] Add selftests verifying that we can parse notes trees with various fanouts Johan Herland
2009-10-09 10:22 ` Johan Herland [this message]
2009-10-09 10:22 ` [RFC/PATCHv7 14/22] Add selftests verifying concatenation of multiple notes for the same commit Johan Herland
2009-10-09 10:22 ` [RFC/PATCHv7 15/22] Notes API: get_commit_notes() -> format_note() + remove the commit restriction Johan Herland
2009-10-09 10:22 ` [RFC/PATCHv7 16/22] Notes API: init_notes(): Initialize the notes tree from the given notes ref Johan Herland
2009-10-09 10:22 ` [RFC/PATCHv7 17/22] Notes API: add_note(): Add note objects to the internal notes tree structure Johan Herland
2009-10-09 10:22 ` [RFC/PATCHv7 18/22] Notes API: get_note(): Return the note annotating the given object Johan Herland
2009-10-09 10:22 ` [RFC/PATCHv7 19/22] Notes API: for_each_note(): Traverse the entire notes tree with a callback Johan Herland
2009-10-09 10:22 ` [RFC/PATCHv7 20/22] Notes API: Allow multiple concurrent notes trees with new struct notes_tree Johan Herland
2009-10-09 10:22 ` [RFC/PATCHv7 21/22] Refactor notes concatenation into a flexible interface for combining notes Johan Herland
2009-10-09 10:22 ` [RFC/PATCHv7 22/22] fast-import: Proper notes tree manipulation using the notes API Johan Herland
2009-10-09 14:25   ` Shawn O. Pearce
2009-11-20  1:43     ` Johan Herland

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1255083738-23263-15-git-send-email-johan@herland.net \
    --to=johan@herland.net \
    --cc=Johannes.Schindelin@gmx.de \
    --cc=chriscool@tuxfamily.org \
    --cc=git@drmicha.warpmail.net \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=sam@vilain.net \
    --cc=spearce@spearce.org \
    --cc=tavestbo@trolltech.com \
    --cc=trast@student.ethz.ch \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.