git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: Kevin Daudt <me@ikke.info>
To: git@vger.kernel.org
Cc: Junio C Hamano <gitster@pobox.com>,
	Swift Geek <swiftgeek@gmail.com>, Jeff King <peff@peff.net>,
	Kevin Daudt <me@ikke.info>
Subject: [PATCH v3 2/2] mailinfo: unescape quoted-pair in header fields
Date: Sun, 25 Sep 2016 23:08:08 +0200	[thread overview]
Message-ID: <20160925210808.26424-2-me@ikke.info> (raw)
In-Reply-To: <20160925210808.26424-1-me@ikke.info>

rfc2822 has provisions for quoted strings and comments in structured header
fields, but also allows for escaping these with so-called quoted-pairs.

The only thing git currently does is removing exterior quotes, but
quotes within are left alone.

Remove exterior quotes and remove escape characters so that they don't
show up in the author field.

Signed-off-by: Kevin Daudt <me@ikke.info>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Changes since v2:

 - handle comments inside comments recursively
 - renamed the main function to unquote_quoted_pairs because it also
   handles quoted pairs in comments


 mailinfo.c                   | 82 ++++++++++++++++++++++++++++++++++++++++++++
 t/t5100-mailinfo.sh          | 14 ++++++++
 t/t5100/comment.expect       |  5 +++
 t/t5100/comment.in           |  9 +++++
 t/t5100/quoted-string.expect |  5 +++
 t/t5100/quoted-string.in     |  9 +++++
 6 files changed, 124 insertions(+)
 create mode 100644 t/t5100/comment.expect
 create mode 100644 t/t5100/comment.in
 create mode 100644 t/t5100/quoted-string.expect
 create mode 100644 t/t5100/quoted-string.in

diff --git a/mailinfo.c b/mailinfo.c
index e19abe3..b4118a0 100644
--- a/mailinfo.c
+++ b/mailinfo.c
@@ -54,6 +54,86 @@ static void parse_bogus_from(struct mailinfo *mi, const struct strbuf *line)
 	get_sane_name(&mi->name, &mi->name, &mi->email);
 }
 
+static const char *unquote_comment(struct strbuf *outbuf, const char *in)
+{
+	int c;
+	int take_next_litterally = 0;
+
+	strbuf_addch(outbuf, '(');
+
+	while ((c = *in++) != 0) {
+		if (take_next_litterally == 1) {
+			take_next_litterally = 0;
+		} else {
+			switch (c) {
+			case '\\':
+				take_next_litterally = 1;
+				continue;
+			case '(':
+				in = unquote_comment(outbuf, in);
+				continue;
+			case ')':
+				strbuf_addch(outbuf, ')');
+				return in;
+			}
+		}
+
+		strbuf_addch(outbuf, c);
+	}
+
+	return in;
+}
+
+static const char *unquote_quoted_string(struct strbuf *outbuf, const char *in)
+{
+	int c;
+	int take_next_litterally = 0;
+
+	while ((c = *in++) != 0) {
+		if (take_next_litterally == 1) {
+			take_next_litterally = 0;
+		} else {
+			switch (c) {
+			case '\\':
+				take_next_litterally = 1;
+				continue;
+			case '"':
+				return in;
+			}
+		}
+
+		strbuf_addch(outbuf, c);
+	}
+
+	return in;
+}
+
+static void unquote_quoted_pair(struct strbuf *line)
+{
+	struct strbuf outbuf;
+	const char *in = line->buf;
+	int c;
+
+	strbuf_init(&outbuf, line->len);
+
+	while ((c = *in++) != 0) {
+		switch (c) {
+		case '"':
+			in = unquote_quoted_string(&outbuf, in);
+			continue;
+		case '(':
+			in = unquote_comment(&outbuf, in);
+			continue;
+		}
+
+		strbuf_addch(&outbuf, c);
+	}
+
+	strbuf_swap(&outbuf, line);
+	strbuf_release(&outbuf);
+
+}
+
 static void handle_from(struct mailinfo *mi, const struct strbuf *from)
 {
 	char *at;
@@ -63,6 +143,8 @@ static void handle_from(struct mailinfo *mi, const struct strbuf *from)
 	strbuf_init(&f, from->len);
 	strbuf_addbuf(&f, from);
 
+	unquote_quoted_pair(&f);
+
 	at = strchr(f.buf, '@');
 	if (!at) {
 		parse_bogus_from(mi, from);
diff --git a/t/t5100-mailinfo.sh b/t/t5100-mailinfo.sh
index c4ed0f4..3e983c0 100755
--- a/t/t5100-mailinfo.sh
+++ b/t/t5100-mailinfo.sh
@@ -144,4 +144,18 @@ test_expect_success 'mailinfo unescapes with --mboxrd' '
 	test_cmp expect mboxrd/msg
 '
 
+test_expect_success 'mailinfo handles rfc2822 quoted-string' '
+	mkdir quoted-string &&
+	git mailinfo /dev/null /dev/null <"$DATA"/quoted-string.in \
+		>quoted-string/info &&
+	test_cmp "$DATA"/quoted-string.expect quoted-string/info
+'
+
+test_expect_success 'mailinfo handles rfc2822 comment' '
+	mkdir comment &&
+	git mailinfo /dev/null /dev/null <"$DATA"/comment.in \
+		>comment/info &&
+	test_cmp "$DATA"/comment.expect comment/info
+'
+
 test_done
diff --git a/t/t5100/comment.expect b/t/t5100/comment.expect
new file mode 100644
index 0000000..7228177
--- /dev/null
+++ b/t/t5100/comment.expect
@@ -0,0 +1,5 @@
+Author: A U Thor (this is (really) a comment (honestly))
+Email: somebody@example.com
+Subject: testing comments
+Date: Sun, 25 May 2008 00:38:18 -0700
+
diff --git a/t/t5100/comment.in b/t/t5100/comment.in
new file mode 100644
index 0000000..c53a192
--- /dev/null
+++ b/t/t5100/comment.in
@@ -0,0 +1,9 @@
+From 1234567890123456789012345678901234567890 Mon Sep 17 00:00:00 2001
+From: "A U Thor" <somebody@example.com> (this is \(really\) a comment (honestly))
+Date: Sun, 25 May 2008 00:38:18 -0700
+Subject: [PATCH] testing comments
+
+
+
+---
+patch
diff --git a/t/t5100/quoted-string.expect b/t/t5100/quoted-string.expect
new file mode 100644
index 0000000..cab1bce
--- /dev/null
+++ b/t/t5100/quoted-string.expect
@@ -0,0 +1,5 @@
+Author: Author "The Author" Name
+Email: somebody@example.com
+Subject: testing quoted-pair
+Date: Sun, 25 May 2008 00:38:18 -0700
+
diff --git a/t/t5100/quoted-string.in b/t/t5100/quoted-string.in
new file mode 100644
index 0000000..e2e627a
--- /dev/null
+++ b/t/t5100/quoted-string.in
@@ -0,0 +1,9 @@
+From 1234567890123456789012345678901234567890 Mon Sep 17 00:00:00 2001
+From: "Author \"The Author\" Name" <somebody@example.com>
+Date: Sun, 25 May 2008 00:38:18 -0700
+Subject: [PATCH] testing quoted-pair
+
+
+
+---
+patch
-- 
2.10.0.89.ge802c3a.dirty


  reply	other threads:[~2016-09-25 21:08 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-09-16 21:02 [PATCH] mailinfo: unescape quoted-pair in header fields Kevin Daudt
2016-09-16 22:22 ` Jeff King
2016-09-19 10:51   ` Kevin Daudt
2016-09-20  3:57     ` Jeff King
2016-09-21 16:07       ` Junio C Hamano
2016-09-19 18:54 ` [PATCH v2 0/2] Handle escape characters in From field Kevin Daudt
2016-09-25 21:08   ` [PATCH v3 1/2] t5100-mailinfo: replace common path prefix with variable Kevin Daudt
2016-09-25 21:08     ` Kevin Daudt [this message]
2016-09-26 19:11       ` [PATCH v3 2/2] mailinfo: unescape quoted-pair in header fields Junio C Hamano
2016-09-26 19:26         ` Junio C Hamano
2016-09-26 19:44           ` Kevin Daudt
2016-09-26 22:23             ` Junio C Hamano
2016-09-27 10:26               ` Kevin Daudt
2016-09-26 19:06     ` [PATCH v3 1/2] t5100-mailinfo: replace common path prefix with variable Junio C Hamano
2016-09-28 19:49     ` [PATCH v4 0/2] Handle RFC2822 quoted-pairs in From header Kevin Daudt
2016-09-28 19:52       ` [PATCH v4 1/2] t5100-mailinfo: replace common path prefix with variable Kevin Daudt
2016-09-28 20:21         ` Junio C Hamano
2016-09-28 20:27           ` Kevin Daudt
2016-09-28 19:52       ` [PATCH v4 2/2] mailinfo: unescape quoted-pair in header fields Kevin Daudt
2016-09-19 18:54 ` [PATCH v2 1/2] t5100-mailinfo: replace common path prefix with variable Kevin Daudt
2016-09-19 21:16   ` Junio C Hamano
2016-09-20  3:59     ` Jeff King
2016-09-19 18:54 ` [PATCH v2 2/2] mailinfo: unescape quoted-pair in header fields Kevin Daudt
2016-09-19 21:24   ` Junio C Hamano
2016-09-19 22:04     ` Junio C Hamano
2016-09-20  4:28   ` Jeff King
2016-09-21 11:09   ` Jeff King
2016-09-22 22:17     ` Junio C Hamano
2016-09-23  4:15       ` Jeff King
2016-09-25 20:17         ` Kevin Daudt
2016-09-25 22:38           ` Jakub Narębski
2016-09-26  5:02             ` Kevin Daudt

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160925210808.26424-2-me@ikke.info \
    --to=me@ikke.info \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=peff@peff.net \
    --cc=swiftgeek@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).