All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jonathan Nieder <jrnieder@gmail.com>
To: Junio C Hamano <gitster@pobox.com>
Cc: Johannes Sixt <j6t@kdbg.org>,
	git@vger.kernel.org, Greg Brockman <gdb@mit.edu>,
	Ilari Liusvaara <ilari.liusvaara@elisanet.fi>,
	Elijah Newren <newren@gmail.com>,
	Andreas Schwab <schwab@linux-m68k.org>
Subject: [PATCH 3/3] apply: handle traditional patches with space in filename
Date: Wed, 18 Aug 2010 20:50:14 -0500	[thread overview]
Message-ID: <20100819015014.GC18922@burratino> (raw)
In-Reply-To: <20100819014516.GA7175@burratino>

To discover filenames from the --- and +++ lines in a traditional
unified diff, currently "git apply" scans forward for a whitespace
character on each line and stops there.  It can't use the whole line
because "diff -u" likes to include timestamps, like so:

 --- foo	2000-07-12 16:56:50.020000414 -0500
 +++ bar	2010-07-12 16:56:50.020000414 -0500

The whitespace-seeking heuristic works great, even when the tab
has been converted to spaces by some email + copy-and-paste
related corruption.

Except for one problem: if the filename itself contains whitespace,
the inferred filename will be too short.

When Giuseppe ran into this problem, it was for a file creation
patch (for debian/licenses/LICENSE.global BSD-style Chromium).
So one can't use the list of files present in the index to deduce an
appropriate filename (not to mention that way lies madness; see
v0.99~402, 2005-05-31).

Instead, look for a timestamp and use that if present to mark the end
of the filename.  If no timestamp is present, the old heuristic is
used, with one exception: the space character \040 is not considered
terminating whitespace any more unless it is followed by a timestamp.

Reported-by: Giuseppe Iuculano <iuculano@debian.org>
Acked-by: Guido Günther <agx@sigxcpu.org>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
---
Thanks for the helpful advice and patience at my use of it.

 builtin/apply.c                  |  193 +++++++++++++++++++++++++++++++++++---
 t/t4135-apply-weird-filenames.sh |    4 +-
 2 files changed, 181 insertions(+), 16 deletions(-)

diff --git a/builtin/apply.c b/builtin/apply.c
index efc109e..bd2fcb3 100644
--- a/builtin/apply.c
+++ b/builtin/apply.c
@@ -449,23 +449,157 @@ static char *find_name_gnu(const char *line, char *def, int p_value)
 	return squash_slash(strbuf_detach(&name, NULL));
 }
 
-static char *find_name(const char *line, char *def, int p_value, int terminate)
+static size_t tz_len(const char *line, size_t len)
+{
+	const char *tz, *p;
+
+	if (len < strlen(" +0500") || line[len-strlen(" +0500")] != ' ')
+		return 0;
+	tz = line + len - strlen(" +0500");
+
+	if (tz[1] != '+' && tz[1] != '-')
+		return 0;
+
+	for (p = tz + 2; p != line + len; p++)
+		if (!isdigit(*p))
+			return 0;
+
+	return line + len - tz;
+}
+
+static size_t date_len(const char *line, size_t len)
+{
+	const char *date, *p;
+
+	if (len < strlen("72-02-05") || line[len-strlen("-05")] != '-')
+		return 0;
+	p = date = line + len - strlen("72-02-05");
+
+	if (!isdigit(*p++) || !isdigit(*p++) || *p++ != '-' ||
+	    !isdigit(*p++) || !isdigit(*p++) || *p++ != '-' ||
+	    !isdigit(*p++) || !isdigit(*p++))	/* Not a date. */
+		return 0;
+
+	if (date - line >= strlen("19") &&
+	    isdigit(date[-1]) && isdigit(date[-2]))	/* 4-digit year */
+		date -= strlen("19");
+
+	return line + len - date;
+}
+
+static size_t short_time_len(const char *line, size_t len)
+{
+	const char *time, *p;
+
+	if (len < strlen(" 07:01:32") || line[len-strlen(":32")] != ':')
+		return 0;
+	p = time = line + len - strlen(" 07:01:32");
+
+	/* Permit 1-digit hours? */
+	if (*p++ != ' ' ||
+	    !isdigit(*p++) || !isdigit(*p++) || *p++ != ':' ||
+	    !isdigit(*p++) || !isdigit(*p++) || *p++ != ':' ||
+	    !isdigit(*p++) || !isdigit(*p++))	/* Not a time. */
+		return 0;
+
+	return line + len - time;
+}
+
+static size_t fractional_time_len(const char *line, size_t len)
+{
+	const char *p;
+	size_t n;
+
+	/* Expected format: 19:41:17.620000023 */
+	if (!len || !isdigit(line[len - 1]))
+		return 0;
+	p = line + len - 1;
+
+	/* Fractional seconds. */
+	while (p > line && isdigit(*p))
+		p--;
+	if (*p != '.')
+		return 0;
+
+	/* Hours, minutes, and whole seconds. */
+	n = short_time_len(line, p - line);
+	if (!n)
+		return 0;
+
+	return line + len - p + n;
+}
+
+static size_t trailing_spaces_len(const char *line, size_t len)
+{
+	const char *p;
+
+	/* Expected format: ' ' x (1 or more)  */
+	if (!len || line[len - 1] != ' ')
+		return 0;
+
+	p = line + len;
+	while (p != line) {
+		p--;
+		if (*p != ' ')
+			return line + len - (p + 1);
+	}
+
+	/* All spaces! */
+	return len;
+}
+
+static size_t diff_timestamp_len(const char *line, size_t len)
+{
+	const char *end = line + len;
+	size_t n;
+
+	/*
+	 * Posix: 2010-07-05 19:41:17
+	 * GNU: 2010-07-05 19:41:17.620000023 -0500
+	 */
+
+	if (!isdigit(end[-1]))
+		return 0;
+
+	n = tz_len(line, end - line);
+	end -= n;
+
+	n = short_time_len(line, end - line);
+	if (!n)
+		n = fractional_time_len(line, end - line);
+	end -= n;
+
+	n = date_len(line, end - line);
+	if (!n)	/* No date.  Too bad. */
+		return 0;
+	end -= n;
+
+	if (end == line)	/* No space before date. */
+		return 0;
+	if (end[-1] == '\t') {	/* Success! */
+		end--;
+		return line + len - end;
+	}
+	if (end[-1] != ' ')	/* No space before date. */
+		return 0;
+
+	/* Whitespace damage. */
+	end -= trailing_spaces_len(line, end - line);
+	return line + len - end;
+}
+
+static char *find_name_common(const char *line, char *def, int p_value,
+				const char *end, int terminate)
 {
 	int len;
 	const char *start = NULL;
 
-	if (*line == '"') {
-		char *name = find_name_gnu(line, def, p_value);
-		if (name)
-			return name;
-	}
-
 	if (p_value == 0)
 		start = line;
-	for (;;) {
+	while (line != end) {
 		char c = *line;
 
-		if (isspace(c)) {
+		if (!end && isspace(c)) {
 			if (c == '\n')
 				break;
 			if (name_terminate(start, line-start, c, terminate))
@@ -505,6 +639,37 @@ static char *find_name(const char *line, char *def, int p_value, int terminate)
 	return squash_slash(xmemdupz(start, len));
 }
 
+static char *find_name(const char *line, char *def, int p_value, int terminate)
+{
+	if (*line == '"') {
+		char *name = find_name_gnu(line, def, p_value);
+		if (name)
+			return name;
+	}
+
+	return find_name_common(line, def, p_value, NULL, terminate);
+}
+
+static char *find_name_traditional(const char *line, char *def, int p_value)
+{
+	size_t len = strlen(line);
+	size_t date_len;
+
+	if (*line == '"') {
+		char *name = find_name_gnu(line, def, p_value);
+		if (name)
+			return name;
+	}
+
+	len = strchrnul(line, '\n') - line;
+	date_len = diff_timestamp_len(line, len);
+	if (!date_len)
+		return find_name_common(line, def, p_value, NULL, TERM_TAB);
+	len -= date_len;
+
+	return find_name_common(line, def, p_value, line + len, 0);
+}
+
 static int count_slashes(const char *cp)
 {
 	int cnt = 0;
@@ -527,7 +692,7 @@ static int guess_p_value(const char *nameline)
 
 	if (is_dev_null(nameline))
 		return -1;
-	name = find_name(nameline, NULL, 0, TERM_SPACE | TERM_TAB);
+	name = find_name_traditional(nameline, NULL, 0);
 	if (!name)
 		return -1;
 	cp = strchr(name, '/');
@@ -646,16 +811,16 @@ static void parse_traditional_patch(const char *first, const char *second, struc
 	if (is_dev_null(first)) {
 		patch->is_new = 1;
 		patch->is_delete = 0;
-		name = find_name(second, NULL, p_value, TERM_SPACE | TERM_TAB);
+		name = find_name_traditional(second, NULL, p_value);
 		patch->new_name = name;
 	} else if (is_dev_null(second)) {
 		patch->is_new = 0;
 		patch->is_delete = 1;
-		name = find_name(first, NULL, p_value, TERM_SPACE | TERM_TAB);
+		name = find_name_traditional(first, NULL, p_value);
 		patch->old_name = name;
 	} else {
-		name = find_name(first, NULL, p_value, TERM_SPACE | TERM_TAB);
-		name = find_name(second, name, p_value, TERM_SPACE | TERM_TAB);
+		name = find_name_traditional(first, NULL, p_value);
+		name = find_name_traditional(second, name, p_value);
 		if (has_epoch_timestamp(first)) {
 			patch->is_new = 1;
 			patch->is_delete = 0;
diff --git a/t/t4135-apply-weird-filenames.sh b/t/t4135-apply-weird-filenames.sh
index 9373f64..1e5aad5 100755
--- a/t/t4135-apply-weird-filenames.sh
+++ b/t/t4135-apply-weird-filenames.sh
@@ -59,8 +59,8 @@ try_filename() {
 }
 
 try_filename 'plain'            'postimage.txt'
-try_filename 'with spaces'      'post image.txt' '' success failure failure
-try_filename 'with tab'         'post	image.txt' FUNNYNAMES success failure failure
+try_filename 'with spaces'      'post image.txt'
+try_filename 'with tab'         'post	image.txt' FUNNYNAMES
 try_filename 'with backslash'   'post\image.txt' BSLASHPSPEC
 try_filename 'with quote'       '"postimage".txt' FUNNYNAMES success failure success
 
-- 
1.7.2.1.544.ga752d.dirty

  parent reply	other threads:[~2010-08-19  1:52 UTC|newest]

Thread overview: 57+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-08-11 23:35 What's cooking in git.git (Aug 2010, #02; Wed, 11) Junio C Hamano
2010-08-12  1:41 ` Jonathan Nieder
2010-08-12  2:33   ` Ævar Arnfjörð Bjarmason
2010-08-12  3:15     ` jn/commit-no-change-wo-status (Re: What's cooking in git.git (Aug 2010, #02; Wed, 11)) Jonathan Nieder
2010-08-12  5:47 ` What's cooking in git.git (Aug 2010, #02; Wed, 11) Elijah Newren
2010-08-12 15:49   ` Junio C Hamano
2010-08-12 21:12     ` Elijah Newren
2010-08-12  9:23 ` Johannes Sixt
2010-08-12  9:37   ` Greg Brockman
2010-08-12 10:11     ` Ævar Arnfjörð Bjarmason
2010-08-12 22:08     ` Junio C Hamano
2010-08-12 22:13       ` Greg Brockman
2010-08-12 22:19       ` Junio C Hamano
2010-08-12 10:20   ` Ævar Arnfjörð Bjarmason
2010-08-12 11:35     ` Erik Faye-Lund
2010-08-12 16:50       ` Ævar Arnfjörð Bjarmason
2010-08-12 17:34         ` Chris Packham
2010-08-12 18:35           ` Ævar Arnfjörð Bjarmason
2010-08-12 22:19             ` windows smoke tester (was Re: What's cooking in git.git (Aug 2010, #02; Wed, 11)) Chris Packham
2010-08-12 22:29               ` Ævar Arnfjörð Bjarmason
2010-08-12 22:58                 ` Chris Packham
2010-08-13  1:01                   ` Ævar Arnfjörð Bjarmason
2010-08-14  0:42                     ` Chris Packham
2010-08-14  0:46                       ` Ævar Arnfjörð Bjarmason
2010-08-15  0:54                       ` Tay Ray Chuan
2010-08-15  1:08                         ` Ævar Arnfjörð Bjarmason
2010-08-15 17:39                           ` Tay Ray Chuan
2010-08-12 10:21   ` What's cooking in git.git (Aug 2010, #02; Wed, 11) Ilari Liusvaara
2010-08-12 10:31     ` Johannes Sixt
2010-08-12 15:25       ` Ilari Liusvaara
2010-08-12 12:43   ` Elijah Newren
2010-08-12 22:21     ` Junio C Hamano
2010-08-12 21:58   ` Junio C Hamano
2010-08-12 22:40   ` jn/apply-filename-with-sp (Re: What's cooking in git.git (Aug 2010, #02; Wed, 11)) Jonathan Nieder
2010-08-12 22:46     ` Ævar Arnfjörð Bjarmason
2010-08-12 23:17     ` Junio C Hamano
2010-08-13  0:59       ` Ævar Arnfjörð Bjarmason
2010-08-13 21:44     ` Johannes Sixt
2010-08-14  2:27       ` Jonathan Nieder
2010-08-14 18:37         ` Johannes Sixt
2010-08-15  0:05           ` Jonathan Nieder
2010-08-19  1:45           ` [PATCH v2 0/3] apply: handle traditional patches with space in filename Jonathan Nieder
2010-08-19  1:46             ` [PATCH 1/3] apply: split quoted filename handling into new function Jonathan Nieder
2010-08-19  1:48             ` [PATCH 2/3] tests: exercise "git apply" with weird filenames Jonathan Nieder
2010-08-19  1:50             ` Jonathan Nieder [this message]
2010-08-19 19:56             ` [PATCH v2 0/3] apply: handle traditional patches with space in filename Johannes Sixt
2010-08-20  6:26               ` Jonathan Nieder
2010-08-13  0:08   ` jn/svn-fe Jonathan Nieder
2010-08-13 10:18     ` jn/svn-fe Jakub Narebski
2010-08-13 21:33     ` jn/svn-fe Johannes Sixt
2010-08-13 23:47       ` [PATCH v2 jn/svn-fe 0/5] vcs-svn: Port to Windows Jonathan Nieder
2010-08-13 23:59         ` [PATCH 1/5] compat: add strtok_r() Jonathan Nieder
2010-08-14  0:01         ` [PATCH 2/5] vcs-svn: Rename dirent pool to build on Windows Jonathan Nieder
2010-08-14  0:03         ` [PATCH 3/5] vcs-svn: Avoid %z in format string Jonathan Nieder
2010-08-14  0:04         ` [PATCH 4/5] t9010 (svn-fe): use Unix-style path in URI Jonathan Nieder
2010-08-14  0:06         ` [PATCH 5/5] t9010 (svn-fe): avoid symlinks in test Jonathan Nieder
  -- strict thread matches above, loose matches on Subject: below --
2010-07-24  1:06 [PATCH 0/3] apply: handle traditional patches with spaces in filename Jonathan Nieder
2010-07-24  1:20 ` [PATCH 3/3] apply: Handle traditional patches with space " Jonathan Nieder

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20100819015014.GC18922@burratino \
    --to=jrnieder@gmail.com \
    --cc=gdb@mit.edu \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=ilari.liusvaara@elisanet.fi \
    --cc=j6t@kdbg.org \
    --cc=newren@gmail.com \
    --cc=schwab@linux-m68k.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.