git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: Jonathan Nieder <jrnieder@gmail.com>
To: Junio C Hamano <gitster@pobox.com>
Cc: Johannes Sixt <j6t@kdbg.org>,
	git@vger.kernel.org, Greg Brockman <gdb@mit.edu>,
	Ilari Liusvaara <ilari.liusvaara@elisanet.fi>,
	Elijah Newren <newren@gmail.com>,
	Andreas Schwab <schwab@linux-m68k.org>
Subject: [PATCH 3/3] apply: handle traditional patches with space in filename
Date: Wed, 18 Aug 2010 20:50:14 -0500	[thread overview]
Message-ID: <20100819015014.GC18922@burratino> (raw)
In-Reply-To: <20100819014516.GA7175@burratino>

To discover filenames from the --- and +++ lines in a traditional
unified diff, currently "git apply" scans forward for a whitespace
character on each line and stops there.  It can't use the whole line
because "diff -u" likes to include timestamps, like so:

 --- foo	2000-07-12 16:56:50.020000414 -0500
 +++ bar	2010-07-12 16:56:50.020000414 -0500

The whitespace-seeking heuristic works great, even when the tab
has been converted to spaces by some email + copy-and-paste
related corruption.

Except for one problem: if the filename itself contains whitespace,
the inferred filename will be too short.

When Giuseppe ran into this problem, it was for a file creation
patch (for debian/licenses/LICENSE.global BSD-style Chromium).
So one can't use the list of files present in the index to deduce an
appropriate filename (not to mention that way lies madness; see
v0.99~402, 2005-05-31).

Instead, look for a timestamp and use that if present to mark the end
of the filename.  If no timestamp is present, the old heuristic is
used, with one exception: the space character \040 is not considered
terminating whitespace any more unless it is followed by a timestamp.

Reported-by: Giuseppe Iuculano <iuculano@debian.org>
Acked-by: Guido Günther <agx@sigxcpu.org>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
---
Thanks for the helpful advice and patience at my use of it.

 builtin/apply.c                  |  193 +++++++++++++++++++++++++++++++++++---
 t/t4135-apply-weird-filenames.sh |    4 +-
 2 files changed, 181 insertions(+), 16 deletions(-)

diff --git a/builtin/apply.c b/builtin/apply.c
index efc109e..bd2fcb3 100644
--- a/builtin/apply.c
+++ b/builtin/apply.c
@@ -449,23 +449,157 @@ static char *find_name_gnu(const char *line, char *def, int p_value)
 	return squash_slash(strbuf_detach(&name, NULL));
 }
 
-static char *find_name(const char *line, char *def, int p_value, int terminate)
+static size_t tz_len(const char *line, size_t len)
+{
+	const char *tz, *p;
+
+	if (len < strlen(" +0500") || line[len-strlen(" +0500")] != ' ')
+		return 0;
+	tz = line + len - strlen(" +0500");
+
+	if (tz[1] != '+' && tz[1] != '-')
+		return 0;
+
+	for (p = tz + 2; p != line + len; p++)
+		if (!isdigit(*p))
+			return 0;
+
+	return line + len - tz;
+}
+
+static size_t date_len(const char *line, size_t len)
+{
+	const char *date, *p;
+
+	if (len < strlen("72-02-05") || line[len-strlen("-05")] != '-')
+		return 0;
+	p = date = line + len - strlen("72-02-05");
+
+	if (!isdigit(*p++) || !isdigit(*p++) || *p++ != '-' ||
+	    !isdigit(*p++) || !isdigit(*p++) || *p++ != '-' ||
+	    !isdigit(*p++) || !isdigit(*p++))	/* Not a date. */
+		return 0;
+
+	if (date - line >= strlen("19") &&
+	    isdigit(date[-1]) && isdigit(date[-2]))	/* 4-digit year */
+		date -= strlen("19");
+
+	return line + len - date;
+}
+
+static size_t short_time_len(const char *line, size_t len)
+{
+	const char *time, *p;
+
+	if (len < strlen(" 07:01:32") || line[len-strlen(":32")] != ':')
+		return 0;
+	p = time = line + len - strlen(" 07:01:32");
+
+	/* Permit 1-digit hours? */
+	if (*p++ != ' ' ||
+	    !isdigit(*p++) || !isdigit(*p++) || *p++ != ':' ||
+	    !isdigit(*p++) || !isdigit(*p++) || *p++ != ':' ||
+	    !isdigit(*p++) || !isdigit(*p++))	/* Not a time. */
+		return 0;
+
+	return line + len - time;
+}
+
+static size_t fractional_time_len(const char *line, size_t len)
+{
+	const char *p;
+	size_t n;
+
+	/* Expected format: 19:41:17.620000023 */
+	if (!len || !isdigit(line[len - 1]))
+		return 0;
+	p = line + len - 1;
+
+	/* Fractional seconds. */
+	while (p > line && isdigit(*p))
+		p--;
+	if (*p != '.')
+		return 0;
+
+	/* Hours, minutes, and whole seconds. */
+	n = short_time_len(line, p - line);
+	if (!n)
+		return 0;
+
+	return line + len - p + n;
+}
+
+static size_t trailing_spaces_len(const char *line, size_t len)
+{
+	const char *p;
+
+	/* Expected format: ' ' x (1 or more)  */
+	if (!len || line[len - 1] != ' ')
+		return 0;
+
+	p = line + len;
+	while (p != line) {
+		p--;
+		if (*p != ' ')
+			return line + len - (p + 1);
+	}
+
+	/* All spaces! */
+	return len;
+}
+
+static size_t diff_timestamp_len(const char *line, size_t len)
+{
+	const char *end = line + len;
+	size_t n;
+
+	/*
+	 * Posix: 2010-07-05 19:41:17
+	 * GNU: 2010-07-05 19:41:17.620000023 -0500
+	 */
+
+	if (!isdigit(end[-1]))
+		return 0;
+
+	n = tz_len(line, end - line);
+	end -= n;
+
+	n = short_time_len(line, end - line);
+	if (!n)
+		n = fractional_time_len(line, end - line);
+	end -= n;
+
+	n = date_len(line, end - line);
+	if (!n)	/* No date.  Too bad. */
+		return 0;
+	end -= n;
+
+	if (end == line)	/* No space before date. */
+		return 0;
+	if (end[-1] == '\t') {	/* Success! */
+		end--;
+		return line + len - end;
+	}
+	if (end[-1] != ' ')	/* No space before date. */
+		return 0;
+
+	/* Whitespace damage. */
+	end -= trailing_spaces_len(line, end - line);
+	return line + len - end;
+}
+
+static char *find_name_common(const char *line, char *def, int p_value,
+				const char *end, int terminate)
 {
 	int len;
 	const char *start = NULL;
 
-	if (*line == '"') {
-		char *name = find_name_gnu(line, def, p_value);
-		if (name)
-			return name;
-	}
-
 	if (p_value == 0)
 		start = line;
-	for (;;) {
+	while (line != end) {
 		char c = *line;
 
-		if (isspace(c)) {
+		if (!end && isspace(c)) {
 			if (c == '\n')
 				break;
 			if (name_terminate(start, line-start, c, terminate))
@@ -505,6 +639,37 @@ static char *find_name(const char *line, char *def, int p_value, int terminate)
 	return squash_slash(xmemdupz(start, len));
 }
 
+static char *find_name(const char *line, char *def, int p_value, int terminate)
+{
+	if (*line == '"') {
+		char *name = find_name_gnu(line, def, p_value);
+		if (name)
+			return name;
+	}
+
+	return find_name_common(line, def, p_value, NULL, terminate);
+}
+
+static char *find_name_traditional(const char *line, char *def, int p_value)
+{
+	size_t len = strlen(line);
+	size_t date_len;
+
+	if (*line == '"') {
+		char *name = find_name_gnu(line, def, p_value);
+		if (name)
+			return name;
+	}
+
+	len = strchrnul(line, '\n') - line;
+	date_len = diff_timestamp_len(line, len);
+	if (!date_len)
+		return find_name_common(line, def, p_value, NULL, TERM_TAB);
+	len -= date_len;
+
+	return find_name_common(line, def, p_value, line + len, 0);
+}
+
 static int count_slashes(const char *cp)
 {
 	int cnt = 0;
@@ -527,7 +692,7 @@ static int guess_p_value(const char *nameline)
 
 	if (is_dev_null(nameline))
 		return -1;
-	name = find_name(nameline, NULL, 0, TERM_SPACE | TERM_TAB);
+	name = find_name_traditional(nameline, NULL, 0);
 	if (!name)
 		return -1;
 	cp = strchr(name, '/');
@@ -646,16 +811,16 @@ static void parse_traditional_patch(const char *first, const char *second, struc
 	if (is_dev_null(first)) {
 		patch->is_new = 1;
 		patch->is_delete = 0;
-		name = find_name(second, NULL, p_value, TERM_SPACE | TERM_TAB);
+		name = find_name_traditional(second, NULL, p_value);
 		patch->new_name = name;
 	} else if (is_dev_null(second)) {
 		patch->is_new = 0;
 		patch->is_delete = 1;
-		name = find_name(first, NULL, p_value, TERM_SPACE | TERM_TAB);
+		name = find_name_traditional(first, NULL, p_value);
 		patch->old_name = name;
 	} else {
-		name = find_name(first, NULL, p_value, TERM_SPACE | TERM_TAB);
-		name = find_name(second, name, p_value, TERM_SPACE | TERM_TAB);
+		name = find_name_traditional(first, NULL, p_value);
+		name = find_name_traditional(second, name, p_value);
 		if (has_epoch_timestamp(first)) {
 			patch->is_new = 1;
 			patch->is_delete = 0;
diff --git a/t/t4135-apply-weird-filenames.sh b/t/t4135-apply-weird-filenames.sh
index 9373f64..1e5aad5 100755
--- a/t/t4135-apply-weird-filenames.sh
+++ b/t/t4135-apply-weird-filenames.sh
@@ -59,8 +59,8 @@ try_filename() {
 }
 
 try_filename 'plain'            'postimage.txt'
-try_filename 'with spaces'      'post image.txt' '' success failure failure
-try_filename 'with tab'         'post	image.txt' FUNNYNAMES success failure failure
+try_filename 'with spaces'      'post image.txt'
+try_filename 'with tab'         'post	image.txt' FUNNYNAMES
 try_filename 'with backslash'   'post\image.txt' BSLASHPSPEC
 try_filename 'with quote'       '"postimage".txt' FUNNYNAMES success failure success
 
-- 
1.7.2.1.544.ga752d.dirty

  parent reply	other threads:[~2010-08-19  1:52 UTC|newest]

Thread overview: 57+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-08-11 23:35 What's cooking in git.git (Aug 2010, #02; Wed, 11) Junio C Hamano
2010-08-12  1:41 ` Jonathan Nieder
2010-08-12  2:33   ` Ævar Arnfjörð Bjarmason
2010-08-12  3:15     ` jn/commit-no-change-wo-status (Re: What's cooking in git.git (Aug 2010, #02; Wed, 11)) Jonathan Nieder
2010-08-12  5:47 ` What's cooking in git.git (Aug 2010, #02; Wed, 11) Elijah Newren
2010-08-12 15:49   ` Junio C Hamano
2010-08-12 21:12     ` Elijah Newren
2010-08-12  9:23 ` Johannes Sixt
2010-08-12  9:37   ` Greg Brockman
2010-08-12 10:11     ` Ævar Arnfjörð Bjarmason
2010-08-12 22:08     ` Junio C Hamano
2010-08-12 22:13       ` Greg Brockman
2010-08-12 22:19       ` Junio C Hamano
2010-08-12 10:20   ` Ævar Arnfjörð Bjarmason
2010-08-12 11:35     ` Erik Faye-Lund
2010-08-12 16:50       ` Ævar Arnfjörð Bjarmason
2010-08-12 17:34         ` Chris Packham
2010-08-12 18:35           ` Ævar Arnfjörð Bjarmason
2010-08-12 22:19             ` windows smoke tester (was Re: What's cooking in git.git (Aug 2010, #02; Wed, 11)) Chris Packham
2010-08-12 22:29               ` Ævar Arnfjörð Bjarmason
2010-08-12 22:58                 ` Chris Packham
2010-08-13  1:01                   ` Ævar Arnfjörð Bjarmason
2010-08-14  0:42                     ` Chris Packham
2010-08-14  0:46                       ` Ævar Arnfjörð Bjarmason
2010-08-15  0:54                       ` Tay Ray Chuan
2010-08-15  1:08                         ` Ævar Arnfjörð Bjarmason
2010-08-15 17:39                           ` Tay Ray Chuan
2010-08-12 10:21   ` What's cooking in git.git (Aug 2010, #02; Wed, 11) Ilari Liusvaara
2010-08-12 10:31     ` Johannes Sixt
2010-08-12 15:25       ` Ilari Liusvaara
2010-08-12 12:43   ` Elijah Newren
2010-08-12 22:21     ` Junio C Hamano
2010-08-12 21:58   ` Junio C Hamano
2010-08-12 22:40   ` jn/apply-filename-with-sp (Re: What's cooking in git.git (Aug 2010, #02; Wed, 11)) Jonathan Nieder
2010-08-12 22:46     ` Ævar Arnfjörð Bjarmason
2010-08-12 23:17     ` Junio C Hamano
2010-08-13  0:59       ` Ævar Arnfjörð Bjarmason
2010-08-13 21:44     ` Johannes Sixt
2010-08-14  2:27       ` Jonathan Nieder
2010-08-14 18:37         ` Johannes Sixt
2010-08-15  0:05           ` Jonathan Nieder
2010-08-19  1:45           ` [PATCH v2 0/3] apply: handle traditional patches with space in filename Jonathan Nieder
2010-08-19  1:46             ` [PATCH 1/3] apply: split quoted filename handling into new function Jonathan Nieder
2010-08-19  1:48             ` [PATCH 2/3] tests: exercise "git apply" with weird filenames Jonathan Nieder
2010-08-19  1:50             ` Jonathan Nieder [this message]
2010-08-19 19:56             ` [PATCH v2 0/3] apply: handle traditional patches with space in filename Johannes Sixt
2010-08-20  6:26               ` Jonathan Nieder
2010-08-13  0:08   ` jn/svn-fe Jonathan Nieder
2010-08-13 10:18     ` jn/svn-fe Jakub Narebski
2010-08-13 21:33     ` jn/svn-fe Johannes Sixt
2010-08-13 23:47       ` [PATCH v2 jn/svn-fe 0/5] vcs-svn: Port to Windows Jonathan Nieder
2010-08-13 23:59         ` [PATCH 1/5] compat: add strtok_r() Jonathan Nieder
2010-08-14  0:01         ` [PATCH 2/5] vcs-svn: Rename dirent pool to build on Windows Jonathan Nieder
2010-08-14  0:03         ` [PATCH 3/5] vcs-svn: Avoid %z in format string Jonathan Nieder
2010-08-14  0:04         ` [PATCH 4/5] t9010 (svn-fe): use Unix-style path in URI Jonathan Nieder
2010-08-14  0:06         ` [PATCH 5/5] t9010 (svn-fe): avoid symlinks in test Jonathan Nieder
  -- strict thread matches above, loose matches on Subject: below --
2010-07-24  1:06 [PATCH 0/3] apply: handle traditional patches with spaces in filename Jonathan Nieder
2010-07-24  1:20 ` [PATCH 3/3] apply: Handle traditional patches with space " Jonathan Nieder

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20100819015014.GC18922@burratino \
    --to=jrnieder@gmail.com \
    --cc=gdb@mit.edu \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=ilari.liusvaara@elisanet.fi \
    --cc=j6t@kdbg.org \
    --cc=newren@gmail.com \
    --cc=schwab@linux-m68k.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).