git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC/PATCH 1/3] apply: Allow blank context lines to match beyond EOF
@ 2010-02-17  7:03 Björn Gustavsson
  2010-02-17  8:14 ` Junio C Hamano
  0 siblings, 1 reply; 3+ messages in thread
From: Björn Gustavsson @ 2010-02-17  7:03 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano

"git apply --whitespace=fix" will not always succeed when used
on a series of patches in the following circumstances:

* One patch adds a blank line at the end of a file. (Since
  --whitespace=fix is used, the blank line will *not* be added.)

* The next patch adds non-blanks lines after the blank line
  introduced in the first patch. That patch will not apply
  because the blank line that is expected to be found at end
  of the file is no longer there.

Fix this problem by allowing a blank context line at the beginning
of a hunk to match if parts of it falls beyond end of the file
(i.e. at least one context line must match an existing line in
the file).

TODO: We should probably require that at least one *non-blank*
context line should fall within the boundaries of the file.

TODO: Since this commit touches an important code path in git,
we probably want to add more test cases.

TODO: It could also be useful to handle files that shrink with
blanks lines at the end.

Signed-off-by: Björn Gustavsson <bgustavsson@gmail.com>
---
 builtin-apply.c |  127 ++++++++++++++++++++++++++++++++++++++++++++++--------
 1 files changed, 108 insertions(+), 19 deletions(-)

diff --git a/builtin-apply.c b/builtin-apply.c
index 2a1004d..75c04f0 100644
--- a/builtin-apply.c
+++ b/builtin-apply.c
@@ -1854,18 +1854,55 @@ static int match_fragment(struct image *img,
 {
 	int i;
 	char *fixed_buf, *buf, *orig, *target;
+	int limit;
+	int preimage_limit;
 
-	if (preimage->nr + try_lno > img->nr)
+	/*
+	 * Should we remove blanks line at the end of the file?
+	 *
+	 * If yes, we will allow blank lines in the preimage to
+	 * match non-existing lines beyond the last existing line
+	 * in the file, provided that at least one line falls
+	 * within the boundaries of the file.
+	 */
+	if (match_end && ws_error_action == correct_ws_error &&
+	    ws_rule & WS_BLANK_AT_EOF &&
+	    try_lno < img->nr) {
+		/*
+		 * This hunk must match the end of the file (img) or
+		 * beyond. Set up limit and preimage_limit so that
+		 * the early rejection tests that follow will
+		 * allow the preimage to have lines that extend beyond
+		 * the end of the file. The quick hash test will
+		 * only compare the lines in the preimage that 
+		 * fall within the boundaries of img.
+		 */
+		limit = try_lno + preimage->nr;
+		preimage_limit = img->nr - try_lno;
+	} else {
+		/*
+		 * Not the last hunk or not removing blanks lined
+		 * the end of the file.
+		 *
+		 * Set up the variables so that any hunk that
+		 * fall beyound the end of the file will be
+		 * quickly rejected.
+		 */
+		limit = img->nr;
+		preimage_limit = preimage->nr;
+	}
+
+	if (preimage->nr + try_lno > limit)
 		return 0;
 
 	if (match_beginning && try_lno)
 		return 0;
 
-	if (match_end && preimage->nr + try_lno != img->nr)
+	if (match_end && preimage->nr + try_lno != limit)
 		return 0;
 
 	/* Quick hash check */
-	for (i = 0; i < preimage->nr; i++)
+	for (i = 0; i < preimage_limit; i++)
 		if (preimage->line[i].hash != img->line[try_lno + i].hash)
 			return 0;
 
@@ -1875,12 +1912,17 @@ static int match_fragment(struct image *img,
 	 * otherwise try+fragsize must be still within the preimage,
 	 * and either case, the old piece should match the preimage
 	 * exactly.
+	 *
+	 * We can only have an exact match if the preimage does not
+	 * extend beyond the end of the file.
 	 */
-	if ((match_end
-	     ? (try + preimage->len == img->len)
-	     : (try + preimage->len <= img->len)) &&
-	    !memcmp(img->buf + try, preimage->buf, preimage->len))
-		return 1;
+	if (preimage_limit == preimage->nr) {
+		if ((match_end
+		     ? (try + preimage->len == img->len)
+		     : (try + preimage->len <= img->len)) &&
+		    !memcmp(img->buf + try, preimage->buf, preimage->len))
+			return 1;
+	}
 
 	/*
 	 * No exact match. If we are ignoring whitespace, run a line-by-line
@@ -1932,12 +1974,16 @@ static int match_fragment(struct image *img,
 	 * it might with whitespace fuzz. We haven't been asked to
 	 * ignore whitespace, we were asked to correct whitespace
 	 * errors, so let's try matching after whitespace correction.
+	 *
+	 * The preimage may extend beyond the end of the file,
+	 * but in this loop we will only handle the part of the
+	 * preimage that falls within the file.
 	 */
 	fixed_buf = xmalloc(preimage->len + 1);
 	buf = fixed_buf;
 	orig = preimage->buf;
 	target = img->buf + try;
-	for (i = 0; i < preimage->nr; i++) {
+	for (i = 0; i < preimage_limit; i++) {
 		size_t fixlen; /* length after fixing the preimage */
 		size_t oldlen = preimage->line[i].len;
 		size_t tgtlen = img->line[try_lno + i].len;
@@ -1977,6 +2023,29 @@ static int match_fragment(struct image *img,
 		target += tgtlen;
 	}
 
+
+	/*
+	 * Now handle the lines in the preimage that falls beyond the
+	 * end of the file (if any). They will only match if they are
+	 * empty or only contain whitespace (if WS_BLANK_AT_EOL is
+	 * false).
+	 */
+	for ( ; i < preimage->nr; i++) {
+		size_t fixlen; /* length after fixing the preimage */
+		size_t oldlen = preimage->line[i].len;
+		int j;
+
+		/* Try fixing the line in the preimage */
+		fixlen = ws_fix_copy(buf, orig, oldlen, ws_rule, NULL);
+
+		for (j = 0; j < fixlen; j++)
+			if (!isspace(buf[j]))
+				goto unmatch_exit;
+
+		orig += oldlen;
+		buf += fixlen;
+	}
+
 	/*
 	 * Yes, the preimage is based on an older version that still
 	 * has whitespace breakages unfixed, and fixing them makes the
@@ -2002,11 +2071,17 @@ static int find_pos(struct image *img,
 	unsigned long backwards, forwards, try;
 	int backwards_lno, forwards_lno, try_lno;
 
-	if (preimage->nr > img->nr)
-		return -1;
+	/*
+	 * There used to be a quick reject here in case preimage
+	 * had more lines than img. We must let match_fragment()
+	 * handle that case because a hunk is now allowed to
+	 * extend beyond the end of img when --whitespace=fix
+	 * has been given (and core.whitespace.blanks-at-eof is
+	 * enabled).
+	 */
 
 	/*
-	 * If match_begining or match_end is specified, there is no
+	 * If match_beginning or match_end is specified, there is no
 	 * point starting from a wrong line that will never match and
 	 * wander around and wait for a match at the specified end.
 	 */
@@ -2091,12 +2166,26 @@ static void update_image(struct image *img,
 	int i, nr;
 	size_t remove_count, insert_count, applied_at = 0;
 	char *result;
+	int preimage_limit;
+
+	/*
+	 * If we are removing blank lines at the end of img,
+	 * the preimage may extend beyond the end.
+	 * If that is the case, we must be careful only to
+	 * remove the part of the preimage that falls within
+	 * the boundaries of img. Initialize preimage_limit
+	 * to the number of lines in the preimage that falls
+	 * within the boundaries.
+	 */
+	preimage_limit = preimage->nr;
+	if (preimage_limit > img->nr - applied_pos)
+		preimage_limit = img->nr - applied_pos;
 
 	for (i = 0; i < applied_pos; i++)
 		applied_at += img->line[i].len;
 
 	remove_count = 0;
-	for (i = 0; i < preimage->nr; i++)
+	for (i = 0; i < preimage_limit; i++)
 		remove_count += img->line[applied_pos + i].len;
 	insert_count = postimage->len;
 
@@ -2113,8 +2202,8 @@ static void update_image(struct image *img,
 	result[img->len] = '\0';
 
 	/* Adjust the line table */
-	nr = img->nr + postimage->nr - preimage->nr;
-	if (preimage->nr < postimage->nr) {
+	nr = img->nr + postimage->nr - preimage_limit;
+	if (preimage_limit < postimage->nr) {
 		/*
 		 * NOTE: this knows that we never call remove_first_line()
 		 * on anything other than pre/post image.
@@ -2122,10 +2211,10 @@ static void update_image(struct image *img,
 		img->line = xrealloc(img->line, nr * sizeof(*img->line));
 		img->line_allocated = img->line;
 	}
-	if (preimage->nr != postimage->nr)
+	if (preimage_limit != postimage->nr)
 		memmove(img->line + applied_pos + postimage->nr,
-			img->line + applied_pos + preimage->nr,
-			(img->nr - (applied_pos + preimage->nr)) *
+			img->line + applied_pos + preimage_limit,
+			(img->nr - (applied_pos + preimage_limit)) *
 			sizeof(*img->line));
 	memcpy(img->line + applied_pos,
 	       postimage->line,
@@ -2321,7 +2410,7 @@ static int apply_one_fragment(struct image *img, struct fragment *frag,
 
 	if (applied_pos >= 0) {
 		if (new_blank_lines_at_end &&
-		    preimage.nr + applied_pos == img->nr &&
+		    preimage.nr + applied_pos >= img->nr &&
 		    (ws_rule & WS_BLANK_AT_EOF) &&
 		    ws_error_action != nowarn_ws_error) {
 			record_ws_error(WS_BLANK_AT_EOF, "+", 1, frag->linenr);
-- 
1.7.0

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [RFC/PATCH 1/3] apply: Allow blank context lines to match beyond EOF
  2010-02-17  7:03 [RFC/PATCH 1/3] apply: Allow blank context lines to match beyond EOF Björn Gustavsson
@ 2010-02-17  8:14 ` Junio C Hamano
  2010-02-18  8:45   ` Björn Gustavsson
  0 siblings, 1 reply; 3+ messages in thread
From: Junio C Hamano @ 2010-02-17  8:14 UTC (permalink / raw)
  To: Björn Gustavsson; +Cc: git

Björn Gustavsson <bgustavsson@gmail.com> writes:

> "git apply --whitespace=fix" will not always succeed when used
> on a series of patches in the following circumstances:
>
> * One patch adds a blank line at the end of a file. (Since
>   --whitespace=fix is used, the blank line will *not* be added.)
>
> * The next patch adds non-blanks lines after the blank line
>   introduced in the first patch. That patch will not apply
>   because the blank line that is expected to be found at end
>   of the file is no longer there.
>
> Fix this problem by allowing a blank context line at the beginning
> of a hunk to match if parts of it falls beyond end of the file
> (i.e. at least one context line must match an existing line in
> the file).
>
> TODO: We should probably require that at least one *non-blank*
> context line should fall within the boundaries of the file.

I think that is very sensible; I thought about this after I wrote the
review message in the previous round but failed to mention it.  Happy to
see that you are thinking along the same line.

> @@ -2002,11 +2071,17 @@ static int find_pos(struct image *img,
>  	unsigned long backwards, forwards, try;
>  	int backwards_lno, forwards_lno, try_lno;
>  
> -	if (preimage->nr > img->nr)
> -		return -1;
> +	/*
> +	 * There used to be a quick reject here in case preimage
> +	 * had more lines than img. We must let match_fragment()
> +	 * handle that case because a hunk is now allowed to
> +	 * extend beyond the end of img when --whitespace=fix
> +	 * has been given (and core.whitespace.blanks-at-eof is
> +	 * enabled).
> +	 */

Is it worth to keep the quick-reject if we are not running under
blank-at-eof mode?

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [RFC/PATCH 1/3] apply: Allow blank context lines to match beyond  EOF
  2010-02-17  8:14 ` Junio C Hamano
@ 2010-02-18  8:45   ` Björn Gustavsson
  0 siblings, 0 replies; 3+ messages in thread
From: Björn Gustavsson @ 2010-02-18  8:45 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

2010/2/17 Junio C Hamano <gitster@pobox.com>:

>> @@ -2002,11 +2071,17 @@ static int find_pos(struct image *img,
>>       unsigned long backwards, forwards, try;
>>       int backwards_lno, forwards_lno, try_lno;
>>
>> -     if (preimage->nr > img->nr)
>> -             return -1;
>> +     /*
>> +      * There used to be a quick reject here in case preimage
>> +      * had more lines than img. We must let match_fragment()
>> +      * handle that case because a hunk is now allowed to
>> +      * extend beyond the end of img when --whitespace=fix
>> +      * has been given (and core.whitespace.blanks-at-eof is
>> +      * enabled).
>> +      */
>
> Is it worth to keep the quick-reject if we are not running under
> blank-at-eof mode?

Good point.

As far as I can understand, the quick reject could only make
a difference if there is a huge preimage applied to a big file
and it will only make "git apply" reject the patch faster.

So I created a text file containing one million lines. I deleted
about 60% of the lines and generated a diff.

Applying that diff on the file where the lines had already
been deleted (which would be the same as trying to
apply the patch twice on the original file), "git apply"
without my branch (standard 1.7.0) needed 0.076s
to reject the patch. With my branch (i.e. without the quick
reject), "git apply" rejected the patch in 0.087s.

So unless there is some other real-world use case I haven't
thought of, it does not seem worthwhile to keep
the quick rejection test for performance reasons.

I think I'll factor out the removal of the quick reject
into a separate commit in my next revision of the patch
series, including information from this email in the
commit message.

Another thing is whether the rejection test is actually
needed for correctness reasons. As far I understand
it, is is not not. There is one test that should be changed,
though, to make it clearer why it works. I intend to include
one of the following changes in my next revision of the patch
series.

Either this one:

diff --git a/builtin-apply.c b/builtin-apply.c
index 75c04f0..d58c1ea 100644
--- a/builtin-apply.c
+++ b/builtin-apply.c
@@ -2090,7 +2090,11 @@ static int find_pos(struct image *img,
        else if (match_end)
                line = img->nr - preimage->nr;

-       if (line > img->nr)
+       /*
+        * Because the comparison is unsigned, the following test
+        * will also take care of a negative line number.
+        */
+       if ((size_t) line > img->nr)
                line = img->nr;

        try = 0;

Or this one:

diff --git a/builtin-apply.c b/builtin-apply.c
index 75c04f0..8ca0e32 100644
--- a/builtin-apply.c
+++ b/builtin-apply.c
@@ -2090,7 +2090,9 @@ static int find_pos(struct image *img,
        else if (match_end)
                line = img->nr - preimage->nr;

-       if (line > img->nr)
+       if (line < 0)
+               line = 0;
+       else if (line > img->nr)
                line = img->nr;

        try = 0;

-- 
Björn Gustavsson, Erlang/OTP, Ericsson AB

^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2010-02-18  8:45 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-02-17  7:03 [RFC/PATCH 1/3] apply: Allow blank context lines to match beyond EOF Björn Gustavsson
2010-02-17  8:14 ` Junio C Hamano
2010-02-18  8:45   ` Björn Gustavsson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).