git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC/PATCH] replace: add --graft option
@ 2014-05-18 18:29 Christian Couder
  2014-05-19  9:42 ` Michael Haggerty
  2014-05-19 11:21 ` Jeff King
  0 siblings, 2 replies; 8+ messages in thread
From: Christian Couder @ 2014-05-18 18:29 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Jeff King

The usage string for this option is:

git replace [-f] --graft <commit> [<parent>...]

First we create a new commit that is the same as <commit>
except that its parents are [<parents>...]

Then we create a replace ref that replace <commit> with
the commit we just created.

With this new option, it should be straightforward to
convert grafts to replace refs, with something like:

cat .git/info/grafts | while read line
do git replace --graft $line; done

Signed-off-by: Christian Couder <chriscool@tuxfamily.org>
---
 builtin/replace.c | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 78 insertions(+), 1 deletion(-)

diff --git a/builtin/replace.c b/builtin/replace.c
index 1bb491d..4b3705d 100644
--- a/builtin/replace.c
+++ b/builtin/replace.c
@@ -17,6 +17,7 @@
 static const char * const git_replace_usage[] = {
 	N_("git replace [-f] <object> <replacement>"),
 	N_("git replace [-f] --edit <object>"),
+	N_("git replace [-f] --graft <commit> [<parent>...]"),
 	N_("git replace -d <object>..."),
 	N_("git replace [--format=<format>] [-l [<pattern>]]"),
 	NULL
@@ -294,6 +295,71 @@ static int edit_and_replace(const char *object_ref, int force)
 	return replace_object_sha1(object_ref, old, "replacement", new, force);
 }
 
+static int read_sha1_commit(const unsigned char *sha1, struct strbuf *dst)
+{
+	void *buf;
+	enum object_type type;
+	unsigned long size;
+
+	buf = read_sha1_file(sha1, &type, &size);
+	if (!buf)
+		return error(_("cannot read object %s"), sha1_to_hex(sha1));
+	if (type != OBJ_COMMIT) {
+		free(buf);
+		return error(_("object %s is not a commit"), sha1_to_hex(sha1));
+	}
+	strbuf_attach(dst, buf, size, size + 1);
+	return 0;
+}
+
+static int create_graft(int argc, const char **argv, int force)
+{
+	unsigned char old[20], new[20];
+	const char *old_ref = argv[0];
+	struct strbuf buf = STRBUF_INIT;
+	struct strbuf new_parents = STRBUF_INIT;
+	const char *parent_start, *parent_end;
+	int i;
+
+	if (get_sha1(old_ref, old) < 0)
+		die(_("Not a valid object name: '%s'"), old_ref);
+	lookup_commit_or_die(old, old_ref);
+	if (read_sha1_commit(old, &buf))
+		die(_("Invalid commit: '%s'"), old_ref);
+
+	/* find existing parents */
+	parent_start = buf.buf;
+	parent_start += 46; /* "tree " + "hex sha1" + "\n" */
+	parent_end = parent_start;
+
+	while (starts_with(parent_end, "parent "))
+		parent_end += 48; /* "parent " + "hex sha1" + "\n" */
+
+	/* prepare new parents */
+	for (i = 1; i < argc; i++) {
+		unsigned char sha1[20];
+		if (get_sha1(argv[i], sha1) < 0)
+			die(_("Not a valid object name: '%s'"), argv[i]);
+		lookup_commit_or_die(sha1, argv[i]);
+		strbuf_addf(&new_parents, "parent %s\n", sha1_to_hex(sha1));
+	}
+
+	/* replace existing parents with new ones */
+	strbuf_splice(&buf, parent_start - buf.buf, parent_end - parent_start,
+		      new_parents.buf, new_parents.len);
+
+	if (write_sha1_file(buf.buf, buf.len, commit_type, new))
+		die(_("could not write replacement commit for: '%s'"), old_ref);
+
+	strbuf_release(&new_parents);
+	strbuf_release(&buf);
+
+	if (!hashcmp(old, new))
+		return error("new commit is the same as the old one: '%s'", sha1_to_hex(old));
+
+	return replace_object_sha1(old_ref, old, "replacement", new, force);
+}
+
 int cmd_replace(int argc, const char **argv, const char *prefix)
 {
 	int force = 0;
@@ -303,12 +369,14 @@ int cmd_replace(int argc, const char **argv, const char *prefix)
 		MODE_LIST,
 		MODE_DELETE,
 		MODE_EDIT,
+		MODE_GRAFT,
 		MODE_REPLACE
 	} cmdmode = MODE_UNSPECIFIED;
 	struct option options[] = {
 		OPT_CMDMODE('l', "list", &cmdmode, N_("list replace refs"), MODE_LIST),
 		OPT_CMDMODE('d', "delete", &cmdmode, N_("delete replace refs"), MODE_DELETE),
 		OPT_CMDMODE('e', "edit", &cmdmode, N_("edit existing object"), MODE_EDIT),
+		OPT_CMDMODE('g', "graft", &cmdmode, N_("change a commit's parents"), MODE_GRAFT),
 		OPT_BOOL('f', "force", &force, N_("replace the ref if it exists")),
 		OPT_STRING(0, "format", &format, N_("format"), N_("use this format")),
 		OPT_END()
@@ -325,7 +393,10 @@ int cmd_replace(int argc, const char **argv, const char *prefix)
 		usage_msg_opt("--format cannot be used when not listing",
 			      git_replace_usage, options);
 
-	if (force && cmdmode != MODE_REPLACE && cmdmode != MODE_EDIT)
+	if (force &&
+	    cmdmode != MODE_REPLACE &&
+	    cmdmode != MODE_EDIT &&
+	    cmdmode != MODE_GRAFT)
 		usage_msg_opt("-f only makes sense when writing a replacement",
 			      git_replace_usage, options);
 
@@ -348,6 +419,12 @@ int cmd_replace(int argc, const char **argv, const char *prefix)
 				      git_replace_usage, options);
 		return edit_and_replace(argv[0], force);
 
+	case MODE_GRAFT:
+		if (argc < 1)
+			usage_msg_opt("-g needs at least one argument",
+				      git_replace_usage, options);
+		return create_graft(argc, argv, force);
+
 	case MODE_LIST:
 		if (argc > 1)
 			usage_msg_opt("only one pattern can be given with -l",
-- 
1.9.rc0.17.g651113e

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [RFC/PATCH] replace: add --graft option
  2014-05-18 18:29 [RFC/PATCH] replace: add --graft option Christian Couder
@ 2014-05-19  9:42 ` Michael Haggerty
  2014-05-19 11:19   ` Jeff King
  2014-05-23  6:39   ` Christian Couder
  2014-05-19 11:21 ` Jeff King
  1 sibling, 2 replies; 8+ messages in thread
From: Michael Haggerty @ 2014-05-19  9:42 UTC (permalink / raw)
  To: Christian Couder, Junio C Hamano; +Cc: git, Jeff King

On 05/18/2014 08:29 PM, Christian Couder wrote:
> The usage string for this option is:
> 
> git replace [-f] --graft <commit> [<parent>...]
> 
> First we create a new commit that is the same as <commit>
> except that its parents are [<parents>...]
> 
> Then we create a replace ref that replace <commit> with
> the commit we just created.
> 
> With this new option, it should be straightforward to
> convert grafts to replace refs, with something like:
> 
> cat .git/info/grafts | while read line
> do git replace --graft $line; done

I love the functionality; I think it's a great step towards making
grafts obsolete.

I haven't worked with Git's object reading/writing code much, but it
surprised me that you are editing the commit object basically as a
string, using hard-coded length constants and stuff.  It seems
error-prone, and we already have a commit parser.

Would it be possible to program this at a higher layer of abstraction
based on the commit object produced by the existing commit parser?
E.g., edit the object it produces, and write the result?  Or create a
new commit object out of the parsed commit object and write that?

It's great that you're working on this!

Michael

-- 
Michael Haggerty
mhagger@alum.mit.edu
http://softwareswirl.blogspot.com/

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFC/PATCH] replace: add --graft option
  2014-05-19  9:42 ` Michael Haggerty
@ 2014-05-19 11:19   ` Jeff King
  2014-05-19 17:25     ` Junio C Hamano
  2014-05-23  6:39   ` Christian Couder
  1 sibling, 1 reply; 8+ messages in thread
From: Jeff King @ 2014-05-19 11:19 UTC (permalink / raw)
  To: Michael Haggerty; +Cc: Christian Couder, Junio C Hamano, git

On Mon, May 19, 2014 at 11:42:07AM +0200, Michael Haggerty wrote:

> On 05/18/2014 08:29 PM, Christian Couder wrote:
> > The usage string for this option is:
> > 
> > git replace [-f] --graft <commit> [<parent>...]
> > 
> > First we create a new commit that is the same as <commit>
> > except that its parents are [<parents>...]
> > 
> > Then we create a replace ref that replace <commit> with
> > the commit we just created.
> > 
> > With this new option, it should be straightforward to
> > convert grafts to replace refs, with something like:
> > 
> > cat .git/info/grafts | while read line
> > do git replace --graft $line; done
> 
> I love the functionality; I think it's a great step towards making
> grafts obsolete.

Me too.

> I haven't worked with Git's object reading/writing code much, but it
> surprised me that you are editing the commit object basically as a
> string, using hard-coded length constants and stuff.  It seems
> error-prone, and we already have a commit parser.

I think we have at least two commit parsers already. :)

The one in commit.c:parse_commit_buffer tries hard to be fast and only
get out enough information for a traversal (so parents, commit
timestamp, and tree pointer).

The ones in pretty.c that back format_commit_message
(parse_commit_header, parse_commit_message) are a bit more flexible, as
they record the offsets and lengths of certain key lines. But "parents"
is not one of those lines (we rely on the already-parsed copy in the
"struct commit" for parents and tree information).

It might make sense to just teach parse_commit_header to be a little
more thorough; it has to read past those lines anyway to find the author
and committer lines, so it would not be much more expensive to note
them.  And then of course the code needs to be pulled out of the
pretty-printer and made generally accessible.

While I do like the thought of having a decent reusable commit parser,
this particular case is really about trying to tweak one header, without
touching anything else. IOW, it is basically:

  sed '/^parent /d; /^tree /a\
  parent ...\
  parent ...
  '

That's more or less what Christian's function is doing, though it
assumes things like that the parents come immediately after the tree,
and that they are all in a group. Those are all true for objects created
by git, but I think we can be a little flexible.

It turns out I wrote a proof-of-concept of this in March when we had the
original discussion, but forgot to ever send it to the list. My parsing
function looked like:

	/*
	 * Rewrite the commit object found in "buf", removing any existing
	 * parents and adding lines for any parents in the NULL-terminated
	 * array "parents" (whose strings should be 40-char hex sha1s).
	 *
	 * The output is written to the strbuf "out".
	 */
	static void rewrite_parents(struct strbuf *out, const char *buf, const char **parents)
	{
		int added = 0;
	
		while (buf && *buf) {
			const char *eol = strchrnul(buf, '\n');
			const char *next = *eol ? eol + 1 : eol;
	
			if (eol == buf + 1)
				break;
	
			if (!starts_with(buf, "parent "))
				strbuf_add(out, buf, next - buf);
	
			/*
			 * We always put our parent lines right after the tree line,
			 * which matches how git creates commit objects.
			 */
			if (starts_with(buf, "tree ")) {
				const char **p;
	
				if (added++)
					die("commit has two tree lines?");
				for (p = parents; *p; p++)
					strbuf_addf(out, "parent %s\n", *p);
			}
	
			buf = next;
		}
	
		if (!added)
			die("commit has no tree line?");
	
		strbuf_addstr(out, buf);
	}

Rather than a splice, I did it as I copied into the strbuf, and I
treated each line independently (dropping parent lines, no matter where,
and then finding the tree line as the "proper" place to add new parent
lines).

-Peff

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFC/PATCH] replace: add --graft option
  2014-05-18 18:29 [RFC/PATCH] replace: add --graft option Christian Couder
  2014-05-19  9:42 ` Michael Haggerty
@ 2014-05-19 11:21 ` Jeff King
  1 sibling, 0 replies; 8+ messages in thread
From: Jeff King @ 2014-05-19 11:21 UTC (permalink / raw)
  To: Christian Couder; +Cc: Junio C Hamano, git

On Sun, May 18, 2014 at 08:29:38PM +0200, Christian Couder wrote:

> +static int create_graft(int argc, const char **argv, int force)
> +{
> +	unsigned char old[20], new[20];
> +	const char *old_ref = argv[0];
> +	struct strbuf buf = STRBUF_INIT;
> +	struct strbuf new_parents = STRBUF_INIT;
> +	const char *parent_start, *parent_end;
> +	int i;
> +
> +	if (get_sha1(old_ref, old) < 0)
> +		die(_("Not a valid object name: '%s'"), old_ref);
> +	lookup_commit_or_die(old, old_ref);
> +	if (read_sha1_commit(old, &buf))
> +		die(_("Invalid commit: '%s'"), old_ref);

Do we want to peel to commits here? That is, should:

  git replace --graft v1.5.0 v1.4.0

work? On the one hand, I see it as friendly. On the other, it may be a
bit surprising when working with something as potentially dangerous a
replace refs. If we don't do it automatically, the user can still say
"v1.5.0^{commit}" to be explicit. I dunno; maybe I am being overly
paranoid.

> +	/* prepare new parents */
> +	for (i = 1; i < argc; i++) {
> +		unsigned char sha1[20];
> +		if (get_sha1(argv[i], sha1) < 0)
> +			die(_("Not a valid object name: '%s'"), argv[i]);
> +		lookup_commit_or_die(sha1, argv[i]);
> +		strbuf_addf(&new_parents, "parent %s\n", sha1_to_hex(sha1));
> +	}

Either way, I think _this_ peeling is a sane thing to do.

-Peff

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFC/PATCH] replace: add --graft option
  2014-05-19 11:19   ` Jeff King
@ 2014-05-19 17:25     ` Junio C Hamano
  2014-05-19 17:35       ` Jeff King
  0 siblings, 1 reply; 8+ messages in thread
From: Junio C Hamano @ 2014-05-19 17:25 UTC (permalink / raw)
  To: Jeff King; +Cc: Michael Haggerty, Christian Couder, git

Jeff King <peff@peff.net> writes:

> It might make sense to just teach parse_commit_header to be a little
> more thorough; it has to read past those lines anyway to find the author
> and committer lines, so it would not be much more expensive to note
> them.  And then of course the code needs to be pulled out of the
> pretty-printer and made generally accessible.

I notice that you said "might" above.

> That's more or less what Christian's function is doing, though it
> assumes things like that the parents come immediately after the tree,
> and that they are all in a group. Those are all true for objects created
> by git, but I think we can be a little flexible.

The headers up to committer are cast in stone in their ordering, and
I do not immediately see how loosening it would be beneficial.

Unless you are trying to give users a new way to record exactly the
same commit in twenty-four (or more) ways with their own object
names, that is ;-)

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFC/PATCH] replace: add --graft option
  2014-05-19 17:25     ` Junio C Hamano
@ 2014-05-19 17:35       ` Jeff King
  2014-05-19 18:34         ` Junio C Hamano
  0 siblings, 1 reply; 8+ messages in thread
From: Jeff King @ 2014-05-19 17:35 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Michael Haggerty, Christian Couder, git

On Mon, May 19, 2014 at 10:25:10AM -0700, Junio C Hamano wrote:

> Jeff King <peff@peff.net> writes:
> 
> > It might make sense to just teach parse_commit_header to be a little
> > more thorough; it has to read past those lines anyway to find the author
> > and committer lines, so it would not be much more expensive to note
> > them.  And then of course the code needs to be pulled out of the
> > pretty-printer and made generally accessible.
> 
> I notice that you said "might" above.

Yeah. I think having a reusable parser is definitely a good thing. I'm
not sure if it's worth the massive amount of refactoring that would be
required for this particular use case.

> > That's more or less what Christian's function is doing, though it
> > assumes things like that the parents come immediately after the tree,
> > and that they are all in a group. Those are all true for objects created
> > by git, but I think we can be a little flexible.
> 
> The headers up to committer are cast in stone in their ordering, and
> I do not immediately see how loosening it would be beneficial.
> 
> Unless you are trying to give users a new way to record exactly the
> same commit in twenty-four (or more) ways with their own object
> names, that is ;-)

Sorry, I didn't mean to imply that people can do what they want with
that ordering. Implementations that reorder the headers are stupid and
wrong, and should be fixed.

BUT. I really don't like making these assumptions or doing ad-hoc
parsing all through the code. We _do_ see quirky, wrong objects, and
want to handle them sanely and consistently. That's hard to do when
there are parsers sprinkled throughout the code which handle each case
slightly differently. I don't recall seeing this with header ordering,
but I know that broken ident lines have caused us headaches in the past,
and I'm happy that we've (mostly) settled on the code in
split_ident_line to handle this.

Things like:

  +       /* find existing parents */
  +       parent_start = buf.buf;
  +       parent_start += 46; /* "tree " + "hex sha1" + "\n" */
  +       parent_end = parent_start;

scare me. Is buf actually 46 bytes long? If not, we read past the end of
the buffer. What if it contains something besides "tree <sha1>\n" at the
beginning? We don't even notice!

The version I posted elsewhere in the thread at least operates on a
line-by-line basis, and tries to verify its assumptions (and barfs if
not). It's still doing its own parsing, but at least it's keeping its
assumptions on the object format to a minimum ("drop old parent lines",
"add new ones after tree, and barf if there's not exactly one tree
line").

-Peff

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFC/PATCH] replace: add --graft option
  2014-05-19 17:35       ` Jeff King
@ 2014-05-19 18:34         ` Junio C Hamano
  0 siblings, 0 replies; 8+ messages in thread
From: Junio C Hamano @ 2014-05-19 18:34 UTC (permalink / raw)
  To: Jeff King; +Cc: Michael Haggerty, Christian Couder, git

Jeff King <peff@peff.net> writes:

> On Mon, May 19, 2014 at 10:25:10AM -0700, Junio C Hamano wrote:
>
>> The headers up to committer are cast in stone in their ordering, and
>> I do not immediately see how loosening it would be beneficial.
>> 
>> Unless you are trying to give users a new way to record exactly the
>> same commit in twenty-four (or more) ways with their own object
>> names, that is ;-)
>
> Sorry, I didn't mean to imply that people can do what they want with
> that ordering. Implementations that reorder the headers are stupid and
> wrong, and should be fixed.

Yeah, that was the only thing I meant to say, and what you said in
the rest of message makes sense to me.  I very much like the
approach to parse line-by-line, noticing products from stupid and
wrong implementations and warning or erroring out against them,
while allowing an option to be lenient to help users who want to fix
their repositories contaminated with such objects.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFC/PATCH] replace: add --graft option
  2014-05-19  9:42 ` Michael Haggerty
  2014-05-19 11:19   ` Jeff King
@ 2014-05-23  6:39   ` Christian Couder
  1 sibling, 0 replies; 8+ messages in thread
From: Christian Couder @ 2014-05-23  6:39 UTC (permalink / raw)
  To: mhagger; +Cc: gitster, git, peff

From: Michael Haggerty <mhagger@alum.mit.edu>

> On 05/18/2014 08:29 PM, Christian Couder wrote:
>> The usage string for this option is:
>> 
>> git replace [-f] --graft <commit> [<parent>...]
>> 
>> First we create a new commit that is the same as <commit>
>> except that its parents are [<parents>...]
>> 
>> Then we create a replace ref that replace <commit> with
>> the commit we just created.
>> 
>> With this new option, it should be straightforward to
>> convert grafts to replace refs, with something like:
>> 
>> cat .git/info/grafts | while read line
>> do git replace --graft $line; done
> 
> I love the functionality; I think it's a great step towards making
> grafts obsolete.
> 
> I haven't worked with Git's object reading/writing code much, but it
> surprised me that you are editing the commit object basically as a
> string, using hard-coded length constants and stuff.  It seems
> error-prone, and we already have a commit parser.
> 
> Would it be possible to program this at a higher layer of abstraction
> based on the commit object produced by the existing commit parser?
> E.g., edit the object it produces, and write the result?  Or create a
> new commit object out of the parsed commit object and write that?

I tried to program this at a higher layer of abstraction first, but it
was not easy to properly write the new commit.

> It's great that you're working on this!

Thanks,
Christian.

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2014-05-23  6:40 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-05-18 18:29 [RFC/PATCH] replace: add --graft option Christian Couder
2014-05-19  9:42 ` Michael Haggerty
2014-05-19 11:19   ` Jeff King
2014-05-19 17:25     ` Junio C Hamano
2014-05-19 17:35       ` Jeff King
2014-05-19 18:34         ` Junio C Hamano
2014-05-23  6:39   ` Christian Couder
2014-05-19 11:21 ` Jeff King

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).