All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 2/2] Teach name-rev to identify revisions containing a certain blob
@ 2007-02-17 17:39 Johannes Schindelin
  2007-02-17 18:24 ` Junio C Hamano
                   ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: Johannes Schindelin @ 2007-02-17 17:39 UTC (permalink / raw)
  To: git, Mike Coleman, junkio


If you want to know which revisions contained a certain version
of a file, just say

	git name-rev --file <filename>

which will read the file, and give you a list of revisions
containing a file with the same contents. If <filename> is "-",
it will read the contents from stdin. Of course, this is a really
expensive operation.

This feature was suggested by Mike Coleman.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 builtin-name-rev.c |  118 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 117 insertions(+), 1 deletions(-)

diff --git a/builtin-name-rev.c b/builtin-name-rev.c
index 89ea95d..f08b065 100644
--- a/builtin-name-rev.c
+++ b/builtin-name-rev.c
@@ -3,10 +3,95 @@
 #include "commit.h"
 #include "tag.h"
 #include "refs.h"
+#include "tree-walk.h"
+#include "object-hash.h"
 #include <regex.h>
 
 static const char name_rev_usage[] =
-	"git-name-rev [--tags | --ref-filter=<regexp>] ( --all | --stdin | committish [committish...] )\n";
+	"git-name-rev [--tags | --ref-filter=<regexp>] ( --all | --stdin | committish [committish...] | --file <filename> )\n";
+
+static unsigned char *file_sha1;
+static struct object_hash file_trees = { 0, 0, NULL };
+static struct object_array file_commits = { 0, 0, NULL };
+#define CONTAINS_FILE (1u<<10)
+
+static int get_file(const char *path)
+{
+	int fd, ret;
+	struct stat st;
+
+	file_sha1 = xmalloc(20);
+	if (!strcmp(path, "-"))
+		ret = index_pipe(file_sha1, 0, "blob", 0);
+	else {
+		if ((fd = open(path, O_RDONLY)) < 0 || fstat(fd, &st) < 0)
+			return -1;
+		ret = index_fd(file_sha1, fd, &st, 0, "blob");
+	}
+	if (ret)
+		return ret;
+	if (!parse_object(file_sha1))
+		return error("Object not found for '%s'", path);
+	return 0;
+}
+
+struct object_name {
+	int len;
+	char *name;
+};
+
+static struct object_name *name_file(struct tree *tree)
+{
+	static struct object_name null_name = { 0, NULL };
+	struct object_name *name;
+	struct tree_desc desc;
+	struct name_entry entry;
+
+	if (!tree->object.parsed)
+		parse_tree(tree);
+	else if ((name = lookup_object_in_hash(&tree->object, &file_trees)))
+		return name->len ? name : NULL;
+
+	if (!tree->buffer) {
+		add_object_to_hash(&tree->object, &null_name, &file_trees);
+		return NULL;
+	}
+
+	desc.buf = tree->buffer;
+	desc.size = tree->size;
+
+	while (tree_entry(&desc, &entry))
+		if (!hashcmp(file_sha1, entry.sha1)) {
+			name = xcalloc(sizeof(struct object_name), 1);
+			name->len = entry.pathlen;
+			name->name = xstrdup(entry.path);
+			break;
+		} else if (S_ISDIR(entry.mode)) {
+			struct object *subtree = parse_object(entry.sha1);
+			struct object_name *subname;
+
+			/* just to be safe */
+			if (subtree->type != OBJ_TREE)
+				die("%s is not a tree?",
+						sha1_to_hex(entry.sha1));
+
+			subname = name_file((struct tree *)subtree);
+			if (!subname)
+				continue;
+			name = xcalloc(sizeof(struct object_name), 1);
+			name->len = entry.pathlen + 1 + subname->len;
+			name->name = xmalloc(name->len + 1);
+			memcpy(name->name, entry.path, entry.pathlen);
+			name->name[entry.pathlen] = '/';
+			strncpy(name->name + entry.pathlen + 1,
+					subname->name, subname->len);
+			break;
+		}
+	add_object_to_hash(&tree->object, name ? name : &null_name,
+			&file_trees);
+
+	return name;
+}
 
 typedef struct rev_name {
 	const char *tip_name;
@@ -23,6 +108,7 @@ static void name_rev(struct commit *commit,
 	struct rev_name *name = (struct rev_name *)commit->util;
 	struct commit_list *parents;
 	int parent_number = 1;
+	struct object_name *file_name;
 
 	if (!commit->object.parsed)
 		parse_commit(commit);
@@ -54,6 +140,12 @@ copy_data:
 	} else
 		return;
 
+	if (file_sha1 && !(commit->object.flags & CONTAINS_FILE) &&
+			(file_name = name_file(commit->tree))) {
+		commit->object.flags |= CONTAINS_FILE;
+		add_object_array(&commit->object, NULL, &file_commits);
+	}
+
 	for (parents = commit->parents;
 			parents;
 			parents = parents->next, parent_number++) {
@@ -175,6 +267,14 @@ int cmd_name_rev(int argc, const char **argv, const char *prefix)
 				transform_stdin = 1;
 				cutoff = 0;
 				continue;
+			} else if (!strcmp(*argv, "--file")) {
+				if (argc != 2)
+					usage(name_rev_usage);
+				if (get_file(argv[1]))
+					die("Could not read '%s'", argv[1]);
+				cutoff = 0;
+				argc = 1;
+				continue;
 			}
 			usage(name_rev_usage);
 		}
@@ -202,6 +302,22 @@ int cmd_name_rev(int argc, const char **argv, const char *prefix)
 
 	for_each_ref(name_ref, &data);
 
+	if (file_sha1) {
+		int i;
+		for (i = 0; i < file_commits.nr; i++) {
+			struct commit *commit =
+				(struct commit *)file_commits.objects[i].item;
+			struct rev_name *rev_name = commit->util;
+			struct object_name *obj_name = name_file(commit->tree);
+
+			printf("%s", rev_name->tip_name);
+			if (rev_name->generation)
+				printf("^%d", rev_name->generation);
+			printf(":%s\n", obj_name->name);
+		}
+		return 0;
+	}
+
 	if (transform_stdin) {
 		char buffer[2048];
 		char *p, *p_start;
-- 
1.5.0.2139.gdafc9-dirty

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] Teach name-rev to identify revisions containing a certain blob
  2007-02-17 17:39 [PATCH 2/2] Teach name-rev to identify revisions containing a certain blob Johannes Schindelin
@ 2007-02-17 18:24 ` Junio C Hamano
  2007-02-17 23:52   ` Johannes Schindelin
  2007-02-18  0:04   ` Johannes Schindelin
  2007-02-17 18:30 ` Junio C Hamano
  2007-02-18  8:34 ` Junio C Hamano
  2 siblings, 2 replies; 9+ messages in thread
From: Junio C Hamano @ 2007-02-17 18:24 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: git, Mike Coleman, junkio

Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:

> If you want to know which revisions contained a certain version
> of a file, just say
>
> 	git name-rev --file <filename>
>
> which will read the file, and give you a list of revisions
> containing a file with the same contents. If <filename> is "-",
> it will read the contents from stdin. Of course, this is a really
> expensive operation.

I expected this to take arbitrary object name and let the caller
to do 'hash-object', so that you could also find a certain tree,
not just blob.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] Teach name-rev to identify revisions containing a certain blob
  2007-02-17 17:39 [PATCH 2/2] Teach name-rev to identify revisions containing a certain blob Johannes Schindelin
  2007-02-17 18:24 ` Junio C Hamano
@ 2007-02-17 18:30 ` Junio C Hamano
  2007-02-17 23:31   ` Johannes Schindelin
  2007-02-18  8:34 ` Junio C Hamano
  2 siblings, 1 reply; 9+ messages in thread
From: Junio C Hamano @ 2007-02-17 18:30 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: git, Mike Coleman, junkio

Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:

> +static struct object_hash file_trees = { 0, 0, NULL };
> +static struct object_array file_commits = { 0, 0, NULL };

These don't look good for two reasons: (1) you could leave BSS
to do the 0 initialization; (2) you need to change this if you
need to change the shape of "struct object_hash" later.

> +#define CONTAINS_FILE (1u<<10)

I am partly at fault, but I think we should have a consolidated
bit assignment policy in place before introducing new users of
object flags.  Some older code says in their comments that
revision.h reserves lower 8 bits while others say 16.  I offhand
know who is correct X-<.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] Teach name-rev to identify revisions containing a certain blob
  2007-02-17 18:30 ` Junio C Hamano
@ 2007-02-17 23:31   ` Johannes Schindelin
  0 siblings, 0 replies; 9+ messages in thread
From: Johannes Schindelin @ 2007-02-17 23:31 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Mike Coleman

Hi,

On Sat, 17 Feb 2007, Junio C Hamano wrote:

> Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:
> 
> > +static struct object_hash file_trees = { 0, 0, NULL };
> > +static struct object_array file_commits = { 0, 0, NULL };
> 
> These don't look good for two reasons: (1) you could leave BSS
> to do the 0 initialization; (2) you need to change this if you
> need to change the shape of "struct object_hash" later.

Yes, you are right on both accounts. Can you please just ammend the 
commit?

> > +#define CONTAINS_FILE (1u<<10)
> 
> I am partly at fault, but I think we should have a consolidated
> bit assignment policy in place before introducing new users of
> object flags.  Some older code says in their comments that
> revision.h reserves lower 8 bits while others say 16.  I offhand
> know who is correct X-<.

Something like this? (I did not know where to put the comment, so I let it 
be...)

--
[PATCH] The lower 16 bits of the object flags are reserved for rev_walk

Signed-off-by: Johannes Schindelin <Johannes.Schindelin@gmx.de>

---

 builtin-blame.c    |    4 ++--
 builtin-describe.c |    2 +-
 builtin-reflog.c   |    4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/builtin-blame.c b/builtin-blame.c
index 7a5665f..8323dbf 100644
--- a/builtin-blame.c
+++ b/builtin-blame.c
@@ -66,8 +66,8 @@ static unsigned blame_copy_score;
 #define BLAME_DEFAULT_COPY_SCORE	40
 
 /* bits #0..7 in revision.h, #8..11 used for merge_bases() in commit.c */
-#define METAINFO_SHOWN		(1u<<12)
-#define MORE_THAN_ONE_PATH	(1u<<13)
+#define METAINFO_SHOWN		(1u<<16)
+#define MORE_THAN_ONE_PATH	(1u<<17)
 
 /*
  * One blob in a commit that is being suspected
diff --git a/builtin-describe.c b/builtin-describe.c
index bcc6456..07a96a4 100644
--- a/builtin-describe.c
+++ b/builtin-describe.c
@@ -4,7 +4,7 @@
 #include "refs.h"
 #include "builtin.h"
 
-#define SEEN		(1u<<0)
+#define SEEN		(1u<<16)
 #define MAX_TAGS	(FLAG_BITS - 1)
 
 static const char describe_usage[] =
diff --git a/builtin-reflog.c b/builtin-reflog.c
index 3415551..3aa6902 100644
--- a/builtin-reflog.c
+++ b/builtin-reflog.c
@@ -34,8 +34,8 @@ struct expire_reflog_cb {
 	struct cmd_reflog_expire_cb *cmd;
 };
 
-#define INCOMPLETE	(1u<<10)
-#define STUDYING	(1u<<11)
+#define INCOMPLETE	(1u<<16)
+#define STUDYING	(1u<<17)
 
 static int tree_is_complete(const unsigned char *sha1)
 {

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] Teach name-rev to identify revisions containing a certain blob
  2007-02-17 18:24 ` Junio C Hamano
@ 2007-02-17 23:52   ` Johannes Schindelin
  2007-02-18  0:04   ` Johannes Schindelin
  1 sibling, 0 replies; 9+ messages in thread
From: Johannes Schindelin @ 2007-02-17 23:52 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Mike Coleman

Hi,

On Sat, 17 Feb 2007, Junio C Hamano wrote:

> Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:
> 
> > If you want to know which revisions contained a certain version
> > of a file, just say
> >
> > 	git name-rev --file <filename>
> >
> > which will read the file, and give you a list of revisions
> > containing a file with the same contents. If <filename> is "-",
> > it will read the contents from stdin. Of course, this is a really
> > expensive operation.
> 
> I expected this to take arbitrary object name and let the caller
> to do 'hash-object', so that you could also find a certain tree,
> not just blob.

That was easier than I feared. Here's a patch on top of 2/2:

--
[PATCH] name-rev --file: also accept SHA1 of blobs and trees

This allows you to say

	git name-rev --file master:t/

to see in which revisions the whole directory "t" is identical to what it 
is in "master".

The first hunk is a bugfix for a potential segv.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 builtin-name-rev.c |   10 ++++++----
 1 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/builtin-name-rev.c b/builtin-name-rev.c
index f08b065..e4addc7 100644
--- a/builtin-name-rev.c
+++ b/builtin-name-rev.c
@@ -43,7 +43,7 @@ struct object_name {
 static struct object_name *name_file(struct tree *tree)
 {
 	static struct object_name null_name = { 0, NULL };
-	struct object_name *name;
+	struct object_name *name = NULL;
 	struct tree_desc desc;
 	struct name_entry entry;
 
@@ -141,7 +141,8 @@ copy_data:
 		return;
 
 	if (file_sha1 && !(commit->object.flags & CONTAINS_FILE) &&
-			(file_name = name_file(commit->tree))) {
+			((file_name = name_file(commit->tree)) ||
+			 !hashcmp(commit->tree->object.sha1, file_sha1))) {
 		commit->object.flags |= CONTAINS_FILE;
 		add_object_array(&commit->object, NULL, &file_commits);
 	}
@@ -270,7 +271,8 @@ int cmd_name_rev(int argc, const char **argv, const char *prefix)
 			} else if (!strcmp(*argv, "--file")) {
 				if (argc != 2)
 					usage(name_rev_usage);
-				if (get_file(argv[1]))
+				if (get_file(argv[1]) &&
+						get_sha1(argv[1], file_sha1))
 					die("Could not read '%s'", argv[1]);
 				cutoff = 0;
 				argc = 1;
@@ -313,7 +315,7 @@ int cmd_name_rev(int argc, const char **argv, const char *prefix)
 			printf("%s", rev_name->tip_name);
 			if (rev_name->generation)
 				printf("^%d", rev_name->generation);
-			printf(":%s\n", obj_name->name);
+			printf(":%s\n", obj_name ? obj_name->name : "");
 		}
 		return 0;
 	}
-- 
1.5.0.2139.gdafc9-dirty

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] Teach name-rev to identify revisions containing a certain blob
  2007-02-17 18:24 ` Junio C Hamano
  2007-02-17 23:52   ` Johannes Schindelin
@ 2007-02-18  0:04   ` Johannes Schindelin
  1 sibling, 0 replies; 9+ messages in thread
From: Johannes Schindelin @ 2007-02-18  0:04 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Mike Coleman

Hi,

brown paper bag, please

 builtin-name-rev.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/builtin-name-rev.c b/builtin-name-rev.c
index e4addc7..e79ef1c 100644
--- a/builtin-name-rev.c
+++ b/builtin-name-rev.c
@@ -314,7 +314,7 @@ int cmd_name_rev(int argc, const char **argv, const char *prefix)
 
 			printf("%s", rev_name->tip_name);
 			if (rev_name->generation)
-				printf("^%d", rev_name->generation);
+				printf("~%d", rev_name->generation);
 			printf(":%s\n", obj_name ? obj_name->name : "");
 		}
 		return 0;

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] Teach name-rev to identify revisions containing a certain blob
  2007-02-17 17:39 [PATCH 2/2] Teach name-rev to identify revisions containing a certain blob Johannes Schindelin
  2007-02-17 18:24 ` Junio C Hamano
  2007-02-17 18:30 ` Junio C Hamano
@ 2007-02-18  8:34 ` Junio C Hamano
  2007-02-18  8:48   ` Junio C Hamano
  2007-02-18 15:41   ` Johannes Schindelin
  2 siblings, 2 replies; 9+ messages in thread
From: Junio C Hamano @ 2007-02-18  8:34 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: git, Mike Coleman, junkio

Having said what I said in the comments on [PATCH 1/2], I think
you are either solving a wrong problem, or even if you are
solving the right problem, you are solving in a wrong way in
this patch.

I do not think Mike wants a way to deal with what he gets in his
mailbox from an end user who did this:

	$ cat a-file-whose-name-was-not-told-to-mike >junk
	$ mailx mike.coleman <junk

It's more like he learns the contents _and_ the path from an
end-user who is having trouble with a tarball of unknown
vintage, and wants to identify which snapshot had that contents
at that path [*1*].

So I do not think it is solving the right problem to run around
all over the tree without knowing the path, looking for a
particular object name.  It certainly makes the solution much
more expensive.

Even if that were Mike's problem, there is a problem that the
same object can appear at different places in different trees.
Keeping a single pathname registered in object_hash would mean
the answer is "whatever happens to be found first", regardless
of the path.

To make "whatever happens to be found first" semantics useful, I
think you would need to make the traversal more controllable by
the user of name-rev.  For example, Mike may know what he gave
the end user was from maintenance series of 1.4.4 software, in
which case you would want to let him say "traverse from the tip
of 1.4.4 maintenance branch down, do not worry about other
branches such as 1.5.0 series or 1.4.3 maintenance series".

Your earlier --ref-filter patch would certainly help it, but a
solution based on grep over "rev-list | ls-tree --stdin" (which
currently does not exist) might be more appropriate and
flexible?  I dunno.

[Footnote]

*1* To find a revision that had a particular object at a known
path, you should be able to do:

	git rev-list <starting refs> |
        git diff-tree --stdin --raw -r -Z |
	grep -z that-object-name

but an earlier commit abd4e222 needs to be further enhanced to
add -Z option that means "output fields are not NUL terminated,
output records are", which did not happen.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] Teach name-rev to identify revisions containing a certain blob
  2007-02-18  8:34 ` Junio C Hamano
@ 2007-02-18  8:48   ` Junio C Hamano
  2007-02-18 15:41   ` Johannes Schindelin
  1 sibling, 0 replies; 9+ messages in thread
From: Junio C Hamano @ 2007-02-18  8:48 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: git, Mike Coleman, junkio

Junio C Hamano <junkio@cox.net> writes:

> [Footnote]
>
> *1* To find a revision that had a particular object at a known
> path, you should be able to do:
>
> 	git rev-list <starting refs> |
>       git diff-tree --stdin --raw -r -Z |
> 	grep -z that-object-name

Sheesh, please add "-- <path>" after "-Z".

>
> but an earlier commit abd4e222 needs to be further enhanced to
> add -Z option that means "output fields are not NUL terminated,
> output records are", which did not happen.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] Teach name-rev to identify revisions containing a certain blob
  2007-02-18  8:34 ` Junio C Hamano
  2007-02-18  8:48   ` Junio C Hamano
@ 2007-02-18 15:41   ` Johannes Schindelin
  1 sibling, 0 replies; 9+ messages in thread
From: Johannes Schindelin @ 2007-02-18 15:41 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Mike Coleman

Hi,

On Sun, 18 Feb 2007, Junio C Hamano wrote:

> It's more like he learns the contents _and_ the path from an end-user 
> who is having trouble with a tarball of unknown vintage, and wants to 
> identify which snapshot had that contents at that path [*1*].

Ah! Then it would make more sense to make it a new revision-walk filter?

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2007-02-18 15:41 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-02-17 17:39 [PATCH 2/2] Teach name-rev to identify revisions containing a certain blob Johannes Schindelin
2007-02-17 18:24 ` Junio C Hamano
2007-02-17 23:52   ` Johannes Schindelin
2007-02-18  0:04   ` Johannes Schindelin
2007-02-17 18:30 ` Junio C Hamano
2007-02-17 23:31   ` Johannes Schindelin
2007-02-18  8:34 ` Junio C Hamano
2007-02-18  8:48   ` Junio C Hamano
2007-02-18 15:41   ` Johannes Schindelin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.