git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: "Philippe Blain via GitGitGadget" <gitgitgadget@gmail.com>
To: git@vger.kernel.org
Cc: Michael J Gruber <git@grubix.eu>,
	Matthieu Moy <git@matthieu-moy.fr>,
	John Keeping <john@keeping.me.uk>,
	Karthik Nayak <karthik.188@gmail.com>, Jeff King <peff@peff.net>,
	Alex Henrie <alexhenrie24@gmail.com>,
	Eric Sunshine <sunshine@sunshineco.com>,
	Philippe Blain <levraiphilippeblain@gmail.com>,
	Philippe Blain <levraiphilippeblain@gmail.com>
Subject: [PATCH v4 1/2] ref-filter: handle CRLF at end-of-line more gracefully
Date: Thu, 22 Oct 2020 03:01:18 +0000	[thread overview]
Message-ID: <03b2d7d78a15d15130a68ed1e6092072aa0807cd.1603335680.git.gitgitgadget@gmail.com> (raw)
In-Reply-To: <pull.576.v4.git.1603335680.gitgitgadget@gmail.com>

From: Philippe Blain <levraiphilippeblain@gmail.com>

The ref-filter code does not correctly handle commit or tag messages
that use CRLF as the line terminator. Such messages can be created with
the `--cleanup=verbatim` option of `git commit` and `git tag`, or by
using `git commit-tree` directly.

The function `find_subpos` in ref-filter.c looks for two consecutive
LFs to find the end of the subject line, a sequence which is absent in
messages using CRLF. This results in the whole message being parsed as
the subject line (`%(contents:subject)`), and the body of the message
(`%(contents:body)`) being empty.

Moreover, in `copy_subject`, which wants to return the subject as a
single line, '\n' is replaced by space, but '\r' is
untouched.

This impacts the output of `git branch`, `git tag` and `git
for-each-ref`.

This bug is a regression for `git branch --verbose`, which
bisects down to 949af0684c (branch: use ref-filter printing APIs,
2017-01-10).

Fix this bug in ref-filter by hardening the logic in `copy_subject` and
`find_subpos` to correctly parse messages containing CRLF.

Add a new test script, 't3920-crlf-messages.sh', to test the behaviour
of commands using either the ref-filter or the pretty APIs with messages
using CRLF line endings. The function `test_crlf_subject_body_and_contents`
can be used to test that the `--format` option of `branch`, `tag`,
`for-each-ref`, `log` and `show` correctly displays the subject, body
and raw content of commit and tag messages using CRLF. Test the
output of `branch`, `tag` and `for-each-ref` with such commits.

Helped-by: Junio C Hamano <gitster@pobox.com>
Helped-by: Eric Sunshine <sunshine@sunshineco.com>
Signed-off-by: Philippe Blain <levraiphilippeblain@gmail.com>
---
 ref-filter.c             |  36 ++++++++-----
 t/t3920-crlf-messages.sh | 108 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 130 insertions(+), 14 deletions(-)
 create mode 100755 t/t3920-crlf-messages.sh

diff --git a/ref-filter.c b/ref-filter.c
index c62f6b4822..6476686fea 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -1097,14 +1097,19 @@ static const char *copy_email(const char *buf, struct used_atom *atom)
 
 static char *copy_subject(const char *buf, unsigned long len)
 {
-	char *r = xmemdupz(buf, len);
+	struct strbuf sb = STRBUF_INIT;
 	int i;
 
-	for (i = 0; i < len; i++)
-		if (r[i] == '\n')
-			r[i] = ' ';
+	for (i = 0; i < len; i++) {
+		if (buf[i] == '\r' && i + 1 < len && buf[i + 1] == '\n')
+			continue; /* ignore CR in CRLF */
 
-	return r;
+		if (buf[i] == '\n')
+			strbuf_addch(&sb, ' ');
+		else
+			strbuf_addch(&sb, buf[i]);
+	}
+	return strbuf_detach(&sb, NULL);
 }
 
 static void grab_date(const char *buf, struct atom_value *v, const char *atomname)
@@ -1228,20 +1233,23 @@ static void find_subpos(const char *buf,
 
 	/* subject is first non-empty line */
 	*sub = buf;
-	/* subject goes to first empty line */
-	while (buf < *sig && *buf && *buf != '\n') {
-		eol = strchrnul(buf, '\n');
-		if (*eol)
-			eol++;
-		buf = eol;
-	}
+	/* subject goes to first empty line before signature begins */
+	if ((eol = strstr(*sub, "\n\n"))) {
+		eol = eol < *sig ? eol : *sig;
+	/* check if message uses CRLF */
+	} else if (! (eol = strstr(*sub, "\r\n\r\n"))) {
+		/* treat whole message as subject */
+		eol = strrchr(*sub, '\0');
+	}
+	buf = eol;
 	*sublen = buf - *sub;
 	/* drop trailing newline, if present */
-	if (*sublen && (*sub)[*sublen - 1] == '\n')
+	while (*sublen && ((*sub)[*sublen - 1] == '\n' ||
+			   (*sub)[*sublen - 1] == '\r'))
 		*sublen -= 1;
 
 	/* skip any empty lines */
-	while (*buf == '\n')
+	while (*buf == '\n' || *buf == '\r')
 		buf++;
 	*body = buf;
 	*bodylen = strlen(buf);
diff --git a/t/t3920-crlf-messages.sh b/t/t3920-crlf-messages.sh
new file mode 100755
index 0000000000..3f0ce02c3f
--- /dev/null
+++ b/t/t3920-crlf-messages.sh
@@ -0,0 +1,108 @@
+#!/bin/sh
+
+test_description='Test ref-filter and pretty APIs for commit and tag messages using CRLF'
+. ./test-lib.sh
+
+LIB_CRLF_BRANCHES=""
+
+create_crlf_ref () {
+	branch="$1" &&
+	cat >.crlf-orig-$branch.txt &&
+	cat .crlf-orig-$branch.txt | append_cr >.crlf-message-$branch.txt &&
+	grep 'Subject' .crlf-orig-$branch.txt | tr '\n' ' ' | sed 's/[ ]*$//' | tr -d '\n' >.crlf-subject-$branch.txt &&
+	grep 'Body' .crlf-message-$branch.txt >.crlf-body-$branch.txt || true &&
+	LIB_CRLF_BRANCHES="${LIB_CRLF_BRANCHES} ${branch}" &&
+	test_tick &&
+	hash=$(git commit-tree HEAD^{tree} -p HEAD -F .crlf-message-${branch}.txt) &&
+	git branch ${branch} ${hash} &&
+	git tag tag-${branch} ${branch} -F .crlf-message-${branch}.txt --cleanup=verbatim
+}
+
+create_crlf_refs () {
+	create_crlf_ref crlf <<-\EOF &&
+	Subject first line
+
+	Body first line
+	Body second line
+	EOF
+	create_crlf_ref crlf-empty-lines-after-subject <<-\EOF &&
+	Subject first line
+
+
+	Body first line
+	Body second line
+	EOF
+	create_crlf_ref crlf-two-line-subject <<-\EOF &&
+	Subject first line
+	Subject second line
+
+	Body first line
+	Body second line
+	EOF
+	create_crlf_ref crlf-two-line-subject-no-body <<-\EOF &&
+	Subject first line
+	Subject second line
+	EOF
+	create_crlf_ref crlf-two-line-subject-no-body-trailing-newline <<-\EOF
+	Subject first line
+	Subject second line
+
+	EOF
+}
+
+test_crlf_subject_body_and_contents() {
+	command_and_args="$@" &&
+	command=$1 &&
+	if test ${command} = "branch" || test ${command} = "for-each-ref" || test ${command} = "tag"
+	then
+		atoms="(contents:subject) (contents:body) (contents)"
+	elif test ${command} = "log" || test ${command} = "show"
+	then
+		atoms="s b B"
+	fi &&
+	files="subject body message" &&
+	while test -n "${atoms}"
+	do
+		set ${atoms} && atom=$1 && shift && atoms="$*" &&
+		set ${files} &&	file=$1 && shift && files="$*" &&
+		test_expect_success "${command}: --format='%${atom}' works with messages using CRLF" "
+			rm -f expect &&
+			for ref in ${LIB_CRLF_BRANCHES}
+			do
+				cat .crlf-${file}-\"\${ref}\".txt >>expect &&
+				printf \"\n\" >>expect
+			done &&
+			git $command_and_args --format=\"%${atom}\" >actual &&
+			test_cmp expect actual
+		"
+	done
+}
+
+
+test_expect_success 'Setup refs with commit and tag messages using CRLF' '
+	test_commit inital &&
+	create_crlf_refs
+'
+
+test_expect_success 'branch: --verbose works with messages using CRLF' '
+	rm -f expect &&
+	for branch in $LIB_CRLF_BRANCHES
+	do
+		printf "  " >>expect &&
+		cat .crlf-subject-${branch}.txt >>expect &&
+		printf "\n" >>expect
+	done &&
+	git branch -v >tmp &&
+	# Remove first two columns, and the line for the currently checked out branch
+	current=$(git branch --show-current) &&
+	grep -v $current <tmp | awk "{\$1=\$2=\"\"}1"  >actual &&
+	test_cmp expect actual
+'
+
+test_crlf_subject_body_and_contents branch --list crlf*
+
+test_crlf_subject_body_and_contents tag --list tag-crlf*
+
+test_crlf_subject_body_and_contents for-each-ref refs/heads/crlf*
+
+test_done
-- 
gitgitgadget


  reply	other threads:[~2020-10-22  3:01 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-03-08 18:29 [PATCH 0/3] Teach ref-filter API to correctly handle CRLF in messages Philippe Blain via GitGitGadget
2020-03-08 18:29 ` [PATCH 1/3] t: add lib-crlf-messages.sh for messages containing CRLF Philippe Blain via GitGitGadget
2020-03-08 18:29 ` [PATCH 2/3] ref-filter: teach the API to correctly handle CRLF Philippe Blain via GitGitGadget
2020-03-08 18:29 ` [PATCH 3/3] log: add tests for messages containing CRLF to t4202 Philippe Blain via GitGitGadget
2020-03-09 15:14 ` [PATCH 0/3] Teach ref-filter API to correctly handle CRLF in messages Junio C Hamano
2020-03-10  2:19   ` Philippe Blain
2020-03-10  2:24 ` [PATCH v2 " Philippe Blain via GitGitGadget
2020-03-10  2:24   ` [PATCH v2 1/3] t: add lib-crlf-messages.sh for messages containing CRLF Philippe Blain via GitGitGadget
2020-03-10  2:24   ` [PATCH v2 2/3] ref-filter: fix the API to correctly handle CRLF Philippe Blain via GitGitGadget
2020-03-10 17:50     ` Junio C Hamano
2020-03-10  2:24   ` [PATCH v2 3/3] log: add tests for messages containing CRLF to t4202 Philippe Blain via GitGitGadget
2020-03-10  3:31   ` [PATCH v2 0/3] Teach ref-filter API to correctly handle CRLF in messages Junio C Hamano
2020-03-10 17:24   ` Junio C Hamano
2020-10-12 18:09   ` [PATCH v3 0/3] ref-filter: handle CRLF at end-of-line more gracefully Philippe Blain via GitGitGadget
2020-10-12 18:09     ` [PATCH v3 1/3] t: add lib-crlf-messages.sh for messages containing CRLF Philippe Blain via GitGitGadget
2020-10-12 22:22       ` Junio C Hamano
2020-10-14 13:22         ` Philippe Blain
2020-10-12 22:47       ` Eric Sunshine
2020-10-14 13:20         ` Philippe Blain
2020-10-14 13:45           ` Eric Sunshine
2020-10-14 13:52             ` Philippe Blain
2020-10-14 23:01               ` Eric Sunshine
2020-10-22  3:09         ` Philippe Blain
2020-10-12 18:09     ` [PATCH v3 2/3] ref-filter: handle CRLF at end-of-line more gracefully Philippe Blain via GitGitGadget
2020-10-12 22:24       ` Junio C Hamano
2020-10-14 13:09         ` Philippe Blain
2020-10-12 18:09     ` [PATCH v3 3/3] log, show: add tests for messages containing CRLF Philippe Blain via GitGitGadget
2020-10-22  3:01     ` [PATCH v4 0/2] ref-filter: handle CRLF at end-of-line more gracefully Philippe Blain via GitGitGadget
2020-10-22  3:01       ` Philippe Blain via GitGitGadget [this message]
2020-10-23  0:52         ` [PATCH v4 1/2] " Junio C Hamano
2020-10-23  1:46           ` Philippe Blain
2020-10-28 20:24             ` Junio C Hamano
2020-10-29  1:29               ` Philippe Blain
2020-10-22  3:01       ` [PATCH v4 2/2] log, show: add tests for messages containing CRLF Philippe Blain via GitGitGadget
2020-10-22 19:24         ` Philippe Blain
2020-10-29 12:48       ` [PATCH v5 0/2] ref-filter: handle CRLF at end-of-line more gracefully Philippe Blain via GitGitGadget
2020-10-29 12:48         ` [PATCH v5 1/2] " Philippe Blain via GitGitGadget
2020-10-29 12:48         ` [PATCH v5 2/2] log, show: add tests for messages containing CRLF Philippe Blain via GitGitGadget

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=03b2d7d78a15d15130a68ed1e6092072aa0807cd.1603335680.git.gitgitgadget@gmail.com \
    --to=gitgitgadget@gmail.com \
    --cc=alexhenrie24@gmail.com \
    --cc=git@grubix.eu \
    --cc=git@matthieu-moy.fr \
    --cc=git@vger.kernel.org \
    --cc=john@keeping.me.uk \
    --cc=karthik.188@gmail.com \
    --cc=levraiphilippeblain@gmail.com \
    --cc=peff@peff.net \
    --cc=sunshine@sunshineco.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).