git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: "Jan H. Schönherr" <schnhrr@cs.tu-berlin.de>
To: git@vger.kernel.org
Cc: "Jeff King" <peff@peff.net>,
	"Jan H. Schönherr" <schnhrr@cs.tu-berlin.de>
Subject: [PATCH 4/5] format-patch: fix rfc2047 address encoding with respect to rfc822 specials
Date: Mon,  8 Oct 2012 19:33:28 +0200	[thread overview]
Message-ID: <1349717609-4770-5-git-send-email-schnhrr@cs.tu-berlin.de> (raw)
In-Reply-To: <1349717609-4770-1-git-send-email-schnhrr@cs.tu-berlin.de>

From: Jan H. Schönherr <schnhrr@cs.tu-berlin.de>

According to RFC 2047 and RFC 822, rfc2047 encoded words and and rfc822
quoted strings do not mix.

Be more strict about rfc2047 encoded words in addresses, so that it is a
bit more conform to RFC 2047.

(Especially, my own name gets correctly decoded as Jan H. Schönherr
(without quotes) and not as "Jan H. Schönherr" (with quotes).)

Signed-off-by: Jan H. Schönherr <schnhrr@cs.tu-berlin.de>
---
 pretty.c                | 80 ++++++++++++++++++++++++++++++++++++++-----------
 t/t4014-format-patch.sh | 11 +++++--
 2 Dateien geändert, 71 Zeilen hinzugefügt(+), 20 Zeilen entfernt(-)

diff --git a/pretty.c b/pretty.c
index ee76219..f3a7383 100644
--- a/pretty.c
+++ b/pretty.c
@@ -231,7 +231,7 @@ static int is_rfc822_special(char ch)
 	}
 }
 
-static int has_rfc822_specials(const char *s, int len)
+static int needs_rfc822_quoting(const char *s, int len)
 {
 	int i;
 	for (i = 0; i < len; i++)
@@ -272,7 +272,12 @@ static void add_rfc822_quoted(struct strbuf *out, const char *s, int len)
 	strbuf_addch(out, '"');
 }
 
-static int is_rfc2047_special(char ch)
+enum rfc2047_type {
+	RFC2047_SUBJECT,
+	RFC2047_ADDRESS,
+};
+
+static int is_rfc2047_special(char ch, enum rfc2047_type type)
 {
 	/*
 	 * We encode ' ' using '=20' even though rfc2047
@@ -283,33 +288,62 @@ static int is_rfc2047_special(char ch)
 	if (ch == ' ' || ch == '\n')
 		return 1;
 
-	return (non_ascii(ch) || (ch == '=') || (ch == '?') || (ch == '_'));
+	if (non_ascii(ch) || (ch == '=') || (ch == '?') || (ch == '_'))
+		return 1;
+
+	if (type != RFC2047_ADDRESS)
+		return 0;
+
+	/*
+	 * rfc2047, section 5.3:
+	 *
+	 *    As a replacement for a 'word' entity within a 'phrase', for example,
+	 *    one that precedes an address in a From, To, or Cc header.  The ABNF
+	 *    definition for 'phrase' from RFC 822 thus becomes:
+	 *
+	 *    phrase = 1*( encoded-word / word )
+	 *
+	 *    In this case the set of characters that may be used in a "Q"-encoded
+	 *    'encoded-word' is restricted to: <upper and lower case ASCII
+	 *    letters, decimal digits, "!", "*", "+", "-", "/", "=", and "_"
+	 *    (underscore, ASCII 95.)>.  An 'encoded-word' that appears within a
+	 *    'phrase' MUST be separated from any adjacent 'word', 'text' or
+	 *    'special' by 'linear-white-space'.
+	 */
+
+	/* '=' and '_' are special cases and have been checked above */
+	return !(isalnum(ch) || ch == '!' || ch == '*' || ch == '+' || ch == '-' || ch == '/');
 }
 
-static void add_rfc2047(struct strbuf *sb, const char *line, int len,
-		       const char *encoding)
+static int needs_rfc2047_encoding(const char *line, int len,
+				  enum rfc2047_type type)
 {
-	static const int max_length = 76; /* per rfc2047 */
 	int i;
-	int line_len = last_line_length(sb);
 
 	for (i = 0; i < len; i++) {
 		int ch = line[i];
 		if (non_ascii(ch) || ch == '\n')
-			goto needquote;
+			return 1;
 		if ((i + 1 < len) && (ch == '=' && line[i+1] == '?'))
-			goto needquote;
+			return 1;
 	}
-	strbuf_add_wrapped_bytes(sb, line, len, -line_len, 1, 78+1);
-	return;
 
-needquote:
+	return 0;
+}
+
+static void add_rfc2047(struct strbuf *sb, const char *line, int len,
+		       const char *encoding, enum rfc2047_type type)
+{
+	static const int max_length = 76; /* per rfc2047 */
+	int i;
+	int line_len = last_line_length(sb);
+
 	strbuf_grow(sb, len * 3 + strlen(encoding) + 100);
 	strbuf_addf(sb, "=?%s?q?", encoding);
 	line_len += strlen(encoding) + 5; /* 5 for =??q? */
 	for (i = 0; i < len; i++) {
 		unsigned ch = line[i] & 0xFF;
-		int is_special = is_rfc2047_special(ch);
+		int is_special = is_rfc2047_special(ch, type);
 
 		if (line_len + 2 + (is_special ? 3 : 1) > max_length) {
 			strbuf_addf(sb, "?=\n =?%s?q?", encoding);
@@ -355,13 +389,18 @@ void pp_user_info(const struct pretty_print_context *pp,
 			name_tail--;
 		display_name_length = name_tail - line;
 		strbuf_addstr(sb, "From: ");
-		if (!has_rfc822_specials(line, display_name_length)) {
-			add_rfc2047(sb, line, display_name_length, encoding);
-		} else {
+		if (needs_rfc2047_encoding(line, display_name_length, RFC2047_ADDRESS)) {
+			add_rfc2047(sb, line, display_name_length,
+						encoding, RFC2047_ADDRESS);
+		} else if (needs_rfc822_quoting(line, display_name_length)) {
 			struct strbuf quoted = STRBUF_INIT;
 			add_rfc822_quoted(&quoted, line, display_name_length);
-			add_rfc2047(sb, quoted.buf, quoted.len, encoding);
+			strbuf_add_wrapped_bytes(sb, quoted.buf, quoted.len,
+								-6, 1, 78+1);
 			strbuf_release(&quoted);
+		} else {
+			strbuf_add_wrapped_bytes(sb, line, display_name_length,
+								-6, 1, 78+1);
 		}
 		if (namelen - display_name_length + last_line_length(sb) > 78) {
 			strbuf_addch(sb, '\n');
@@ -1292,7 +1331,12 @@ void pp_title_line(const struct pretty_print_context *pp,
 	strbuf_grow(sb, title.len + 1024);
 	if (pp->subject) {
 		strbuf_addstr(sb, pp->subject);
-		add_rfc2047(sb, title.buf, title.len, encoding);
+		if (needs_rfc2047_encoding(title.buf, title.len, RFC2047_SUBJECT))
+			add_rfc2047(sb, title.buf, title.len, encoding,
+				    RFC2047_SUBJECT);
+		else
+			strbuf_add_wrapped_bytes(sb, title.buf, title.len,
+						 -last_line_length(sb), 1, 78+1);
 	} else {
 		strbuf_addbuf(sb, &title);
 	}
diff --git a/t/t4014-format-patch.sh b/t/t4014-format-patch.sh
index 1d5636d..1a3b6e8 100755
--- a/t/t4014-format-patch.sh
+++ b/t/t4014-format-patch.sh
@@ -830,9 +830,16 @@ test_expect_success 'format-patch quotes double-quote in headers' '
 '
 
 cat >expect <<'EOF'
-From: =?UTF-8?q?"F=C3=B6o=20B.=20Bar"?= <author@example.com>
+From: =?UTF-8?q?F=C3=B6o=20Bar?= <author@example.com>
 EOF
-test_expect_success 'rfc2047-encoded headers also double-quote 822 specials' '
+test_expect_success 'format-patch uses rfc2047-encoded headers when necessary' '
+	check_author "Föo Bar"
+'
+
+cat >expect <<'EOF'
+From: =?UTF-8?q?F=C3=B6o=20B=2E=20Bar?= <author@example.com>
+EOF
+test_expect_success 'rfc2047-encoded headers leave no rfc822 specials' '
 	check_author "Föo B. Bar"
 '
 
-- 
1.7.12

  parent reply	other threads:[~2012-10-08 17:40 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-10-08 17:33 [PATCH 0/5] Cure some format-patch wrapping and encoding issues Jan H. Schönherr
2012-10-08 17:33 ` [PATCH 1/5] format-patch: do not wrap non-rfc2047 headers too early Jan H. Schönherr
2012-10-08 17:33 ` [PATCH 2/5] format-patch: do not wrap rfc2047 encoded headers too late Jan H. Schönherr
     [not found]   ` <7v7gqzfnpj.fsf@alter.siamese.dyndns.org>
2012-10-10  9:31     ` "Jan H. Schönherr"
2012-10-08 17:33 ` [PATCH 3/5] format-patch: introduce helper function last_line_length() Jan H. Schönherr
2012-10-08 17:33 ` Jan H. Schönherr [this message]
2012-10-08 17:33 ` [PATCH 5/5] format-patch: tests: check rfc822+rfc2047 in to+cc headers Jan H. Schönherr
     [not found]   ` <7v391nfmzn.fsf@alter.siamese.dyndns.org>
2012-10-10 10:44     ` "Jan H. Schönherr"
2012-10-10 17:02       ` Junio C Hamano
     [not found] ` <7vfw5nfoq9.fsf@alter.siamese.dyndns.org>
2012-10-10 10:49   ` [PATCH 0/5] Cure some format-patch wrapping and encoding issues "Jan H. Schönherr"

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1349717609-4770-5-git-send-email-schnhrr@cs.tu-berlin.de \
    --to=schnhrr@cs.tu-berlin.de \
    --cc=git@vger.kernel.org \
    --cc=peff@peff.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).