* [PATCH 1/3] apply: Split quoted filename handling into new function
2010-07-24 1:06 [PATCH 0/3] apply: handle traditional patches with spaces in filename Jonathan Nieder
@ 2010-07-24 1:09 ` Jonathan Nieder
2010-07-24 1:11 ` [PATCH 2/3] tests: Test how well "git apply" copes with weird filenames Jonathan Nieder
2010-07-24 1:20 ` [PATCH 3/3] apply: Handle traditional patches with space in filename Jonathan Nieder
2 siblings, 0 replies; 6+ messages in thread
From: Jonathan Nieder @ 2010-07-24 1:09 UTC (permalink / raw)
To: git; +Cc: Junio C Hamano, Guido Günther, Giuseppe Iuculano
The new find_name_gnu() function handles new-style ‘--- "a/foo"’
patch header lines, leaving find_name() itself a bit less
daunting.
Functional change: do not clobber the p-value when there are not
enough path components in a quoted file name to honor it.
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
---
builtin/apply.c | 70 +++++++++++++++++++++++++++---------------------
t/t4120-apply-popt.sh | 35 ++++++++++++++++++++++--
2 files changed, 71 insertions(+), 34 deletions(-)
diff --git a/builtin/apply.c b/builtin/apply.c
index 12ef9ea..efc109e 100644
--- a/builtin/apply.c
+++ b/builtin/apply.c
@@ -416,44 +416,52 @@ static char *squash_slash(char *name)
return name;
}
+static char *find_name_gnu(const char *line, char *def, int p_value)
+{
+ struct strbuf name = STRBUF_INIT;
+ char *cp;
+
+ /*
+ * Proposed "new-style" GNU patch/diff format; see
+ * http://marc.theaimsgroup.com/?l=git&m=112927316408690&w=2
+ */
+ if (unquote_c_style(&name, line, NULL)) {
+ strbuf_release(&name);
+ return NULL;
+ }
+
+ for (cp = name.buf; p_value; p_value--) {
+ cp = strchr(cp, '/');
+ if (!cp) {
+ strbuf_release(&name);
+ return NULL;
+ }
+ cp++;
+ }
+
+ /* name can later be freed, so we need
+ * to memmove, not just return cp
+ */
+ strbuf_remove(&name, 0, cp - name.buf);
+ free(def);
+ if (root)
+ strbuf_insert(&name, 0, root, root_len);
+ return squash_slash(strbuf_detach(&name, NULL));
+}
+
static char *find_name(const char *line, char *def, int p_value, int terminate)
{
int len;
const char *start = NULL;
+ if (*line == '"') {
+ char *name = find_name_gnu(line, def, p_value);
+ if (name)
+ return name;
+ }
+
if (p_value == 0)
start = line;
-
- if (*line == '"') {
- struct strbuf name = STRBUF_INIT;
-
- /*
- * Proposed "new-style" GNU patch/diff format; see
- * http://marc.theaimsgroup.com/?l=git&m=112927316408690&w=2
- */
- if (!unquote_c_style(&name, line, NULL)) {
- char *cp;
-
- for (cp = name.buf; p_value; p_value--) {
- cp = strchr(cp, '/');
- if (!cp)
- break;
- cp++;
- }
- if (cp) {
- /* name can later be freed, so we need
- * to memmove, not just return cp
- */
- strbuf_remove(&name, 0, cp - name.buf);
- free(def);
- if (root)
- strbuf_insert(&name, 0, root, root_len);
- return squash_slash(strbuf_detach(&name, NULL));
- }
- }
- strbuf_release(&name);
- }
-
for (;;) {
char c = *line;
diff --git a/t/t4120-apply-popt.sh b/t/t4120-apply-popt.sh
index b463b4f..2b2d00b 100755
--- a/t/t4120-apply-popt.sh
+++ b/t/t4120-apply-popt.sh
@@ -10,21 +10,50 @@ test_description='git apply -p handling.'
test_expect_success setup '
mkdir sub &&
echo A >sub/file1 &&
- cp sub/file1 file1 &&
+ cp sub/file1 file1.saved &&
git add sub/file1 &&
echo B >sub/file1 &&
git diff >patch.file &&
- rm sub/file1 &&
- rmdir sub
+ git checkout -- sub/file1 &&
+ git mv sub süb &&
+ echo B >süb/file1 &&
+ git diff >patch.escaped &&
+ grep "[\]" patch.escaped &&
+ rm süb/file1 &&
+ rmdir süb
'
test_expect_success 'apply git diff with -p2' '
+ cp file1.saved file1 &&
git apply -p2 patch.file
'
test_expect_success 'apply with too large -p' '
+ cp file1.saved file1 &&
test_must_fail git apply --stat -p3 patch.file 2>err &&
grep "removing 3 leading" err
'
+test_expect_success 'apply (-p2) traditional diff with funny filenames' '
+ cat >patch.quotes <<-\EOF &&
+ diff -u "a/"sub/file1 "b/"sub/file1
+ --- "a/"sub/file1
+ +++ "b/"sub/file1
+ @@ -1 +1 @@
+ -A
+ +B
+ EOF
+ echo B >expected &&
+
+ cp file1.saved file1 &&
+ git apply -p2 patch.quotes &&
+ test_cmp expected file1
+'
+
+test_expect_success 'apply with too large -p and fancy filename' '
+ cp file1.saved file1 &&
+ test_must_fail git apply --stat -p3 patch.escaped 2>err &&
+ grep "removing 3 leading" err
+'
+
test_done
--
1.7.2.rc3
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH 3/3] apply: Handle traditional patches with space in filename
2010-07-24 1:06 [PATCH 0/3] apply: handle traditional patches with spaces in filename Jonathan Nieder
2010-07-24 1:09 ` [PATCH 1/3] apply: Split quoted filename handling into new function Jonathan Nieder
2010-07-24 1:11 ` [PATCH 2/3] tests: Test how well "git apply" copes with weird filenames Jonathan Nieder
@ 2010-07-24 1:20 ` Jonathan Nieder
2 siblings, 0 replies; 6+ messages in thread
From: Jonathan Nieder @ 2010-07-24 1:20 UTC (permalink / raw)
To: git; +Cc: Junio C Hamano, Guido Günther, Giuseppe Iuculano
To discover filenames from the --- and +++ lines in a traditional
unified diff, currently ‘git apply’ scans forward for a whitespace
character on each line and stops there. It can’t use the whole line
because ‘diff -u’ likes to include timestamps, like so:
--- foo 2000-07-12 16:56:50.020000414 -0500
+++ bar 2010-07-12 16:56:50.020000414 -0500
The whitespace-seeking heuristic works great, even when the tab
has been converted to spaces by some email + copy-and-paste
related corruption.
Except for one problem: if the filename itself contains whitespace,
the inferred filename will be too short.
When Giuseppe ran into this problem, it was for a file creation
patch (filename ‘debian/licenses/LICENSE.global BSD-style Chromium’).
So one can’t use the list of files present in the index to deduce an
appropriate filename (not to mention that way lies madness; see
v0.99~402, 2005-05-31).
Instead, look for a timestamp and use that if present to mark the end
of the filename. If no timestamp is present, the old heuristic is
used, with one exception: the space character \040 is not considered
terminating whitespace any more unless it is followed by a timestamp.
Reported-by: Giuseppe Iuculano <iuculano@debian.org>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Acked-by: Guido Günther <agx@sigxcpu.org>
---
Guido, I have carried over your ack from <http://bugs.debian.org/578752>.
I hope that is okay. And to both Guido and Giuseppe, sorry to have
taken so long on this.
That is the end of the series. I hope it was not too unpleasant a read.
As always, thoughts, improvements, bugs welcome.
Regards,
Jonathan
builtin/apply.c | 193 +++++++++++++++++++++++++++++++++++---
t/t4135-apply-weird-filenames.sh | 4 +-
2 files changed, 181 insertions(+), 16 deletions(-)
diff --git a/builtin/apply.c b/builtin/apply.c
index efc109e..b975c99 100644
--- a/builtin/apply.c
+++ b/builtin/apply.c
@@ -449,23 +449,157 @@ static char *find_name_gnu(const char *line, char *def, int p_value)
return squash_slash(strbuf_detach(&name, NULL));
}
-static char *find_name(const char *line, char *def, int p_value, int terminate)
+static size_t tz_len(const char *line, size_t len)
+{
+ const char *tz, *p;
+
+ if (len < strlen(" +0500") || line[len-strlen(" +0500")] != ' ')
+ return 0;
+ tz = line + len - strlen(" +0500");
+
+ if (tz[1] != '+' && tz[1] != '-')
+ return 0;
+
+ for (p = tz + 2; p != line + len; p++)
+ if (!isdigit(*p))
+ return 0;
+
+ return line + len - tz;
+}
+
+static size_t date_len(const char *line, size_t len)
+{
+ const char *date, *p;
+
+ if (len < strlen("72-02-05") || line[len-strlen("-05")] != '-')
+ return 0;
+ p = date = line + len - strlen("72-02-05");
+
+ if (!isdigit(*p++) || !isdigit(*p++) || *p++ != '-' ||
+ !isdigit(*p++) || !isdigit(*p++) || *p++ != '-' ||
+ !isdigit(*p++) || !isdigit(*p++)) /* Not a date. */
+ return 0;
+
+ if (date - line >= strlen("19") &&
+ isdigit(date[-1]) && isdigit(date[-2])) /* 4-digit year */
+ date -= strlen("19");
+
+ return line + len - date;
+}
+
+static size_t short_time_len(const char *line, size_t len)
+{
+ const char *time, *p;
+
+ if (len < strlen(" 07:01:32") || line[len-strlen(":32")] != ':')
+ return 0;
+ p = time = line + len - strlen(" 07:01:32");
+
+ /* Permit 1-digit hours? */
+ if (*p++ != ' ' ||
+ !isdigit(*p++) || !isdigit(*p++) || *p++ != ':' ||
+ !isdigit(*p++) || !isdigit(*p++) || *p++ != ':' ||
+ !isdigit(*p++) || !isdigit(*p++)) /* Not a time. */
+ return 0;
+
+ return line + len - time;
+}
+
+static size_t fractional_time_len(const char *line, size_t len)
+{
+ const char *p;
+ size_t n;
+
+ /* Expected format: 19:41:17.620000023 */
+ if (!len || !isdigit(line[len - 1]))
+ return 0;
+ p = line + len - 1;
+
+ /* Fractional seconds. */
+ while (p > line && isdigit(*p))
+ p--;
+ if (*p != '.')
+ return 0;
+
+ /* Hours, minutes, and whole seconds. */
+ n = short_time_len(line, p - line);
+ if (!n)
+ return 0;
+
+ return line + len - p + n;
+}
+
+static size_t trailing_spaces_len(const char *line, size_t len)
+{
+ const char *p;
+
+ /* Expected format: ' ' x (1 or more) */
+ if (!len || line[len - 1] != ' ')
+ return 0;
+
+ p = line + len;
+ while (p != line) {
+ p--;
+ if (*p != ' ')
+ return line + len - (p + 1);
+ }
+
+ /* All spaces! */
+ return len;
+}
+
+static size_t diff_timestamp_len(const char *line, size_t len)
+{
+ const char *end = line + len;
+ size_t n;
+
+ /*
+ * Posix: 2010-07-05 19:41:17
+ * GNU: 2010-07-05 19:41:17.620000023 -0500
+ */
+
+ if (!isdigit(end[-1]))
+ return 0;
+
+ n = tz_len(line, end - line);
+ end -= n;
+
+ n = short_time_len(line, end - line);
+ if (!n)
+ n = fractional_time_len(line, end - line);
+ end -= n;
+
+ n = date_len(line, end - line);
+ if (!n) /* No date. Too bad. */
+ return 0;
+ end -= n;
+
+ if (end == line) /* No space before date. */
+ return 0;
+ if (end[-1] == '\t') { /* Success! */
+ end--;
+ return line + len - end;
+ }
+ if (end[-1] != ' ') /* No space before date. */
+ return 0;
+
+ /* Whitespace damage. */
+ end -= trailing_spaces_len(line, end - line);
+ return line + len - end;
+}
+
+static char *find_name_common(const char *line, char *def, int p_value,
+ const char *end, int terminate)
{
int len;
const char *start = NULL;
- if (*line == '"') {
- char *name = find_name_gnu(line, def, p_value);
- if (name)
- return name;
- }
-
if (p_value == 0)
start = line;
- for (;;) {
+ while (line != end) {
char c = *line;
- if (isspace(c)) {
+ if (!end && isspace(c)) {
if (c == '\n')
break;
if (name_terminate(start, line-start, c, terminate))
@@ -505,6 +639,37 @@ static char *find_name(const char *line, char *def, int p_value, int terminate)
return squash_slash(xmemdupz(start, len));
}
+static char *find_name(const char *line, char *def, int p_value, int terminate)
+{
+ if (*line == '"') {
+ char *name = find_name_gnu(line, def, p_value);
+ if (name)
+ return name;
+ }
+
+ return find_name_common(line, def, p_value, NULL, terminate);
+}
+
+static char *find_name_traditional(const char *line, char *def, int p_value)
+{
+ size_t len = strlen(line);
+ size_t date_len;
+
+ if (*line == '"') {
+ char *name = find_name_gnu(line, def, p_value);
+ if (name)
+ return name;
+ }
+
+ len = strchrnul(line, '\n') - line;
+ date_len = diff_timestamp_len(line, len);
+ if (!date_len)
+ return find_name(line, def, p_value, TERM_TAB);
+ len -= date_len;
+
+ return find_name_common(line, def, p_value, line + len, 0);
+}
+
static int count_slashes(const char *cp)
{
int cnt = 0;
@@ -527,7 +692,7 @@ static int guess_p_value(const char *nameline)
if (is_dev_null(nameline))
return -1;
- name = find_name(nameline, NULL, 0, TERM_SPACE | TERM_TAB);
+ name = find_name_traditional(nameline, NULL, 0);
if (!name)
return -1;
cp = strchr(name, '/');
@@ -646,16 +811,16 @@ static void parse_traditional_patch(const char *first, const char *second, struc
if (is_dev_null(first)) {
patch->is_new = 1;
patch->is_delete = 0;
- name = find_name(second, NULL, p_value, TERM_SPACE | TERM_TAB);
+ name = find_name_traditional(second, NULL, p_value);
patch->new_name = name;
} else if (is_dev_null(second)) {
patch->is_new = 0;
patch->is_delete = 1;
- name = find_name(first, NULL, p_value, TERM_SPACE | TERM_TAB);
+ name = find_name_traditional(first, NULL, p_value);
patch->old_name = name;
} else {
- name = find_name(first, NULL, p_value, TERM_SPACE | TERM_TAB);
- name = find_name(second, name, p_value, TERM_SPACE | TERM_TAB);
+ name = find_name_traditional(first, NULL, p_value);
+ name = find_name_traditional(second, name, p_value);
if (has_epoch_timestamp(first)) {
patch->is_new = 1;
patch->is_delete = 0;
diff --git a/t/t4135-apply-weird-filenames.sh b/t/t4135-apply-weird-filenames.sh
index 2dcb040..52f9f5b 100755
--- a/t/t4135-apply-weird-filenames.sh
+++ b/t/t4135-apply-weird-filenames.sh
@@ -94,8 +94,8 @@ try_filename() {
}
try_filename 'plain' 'postimage.txt'
-try_filename 'with spaces' 'post image.txt' success failure failure
-try_filename 'with tab' 'post image.txt' success failure failure
+try_filename 'with spaces' 'post image.txt'
+try_filename 'with tab' 'post image.txt'
try_filename 'with backslash' 'post\image.txt'
try_filename 'with quote' '"postimage".txt' success failure success
--
1.7.2.rc3
^ permalink raw reply related [flat|nested] 6+ messages in thread