git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org
Cc: "Junio C Hamano" <gitster@pobox.com>, "Jeff King" <peff@peff.net>,
	"Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH 6/9] pretty: two phase conversion for non utf-8 commits
Date: Sun, 23 Sep 2012 16:10:30 +0700	[thread overview]
Message-ID: <1348391433-11300-7-git-send-email-pclouds@gmail.com> (raw)
In-Reply-To: <1348391433-11300-1-git-send-email-pclouds@gmail.com>

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset=UTF-8, Size: 5420 bytes --]

Always assume format_commit_item() takes an utf-8 string for
simplicity. If commit message is in non-utf8, or output encoding is
not, then the commit is first converted to utf-8, processed, then
output converted to output encoding.

This of course only works with encodings that are compatible with
Unicode.

This also fixes the iso8859-1 test. It's supposed to create an
iso8859-1 commit, but the commit content in t6006-rev-list-format.sh
is in UTF-8. Split the content out in a separate file (so its encoding
won't accidentally be converted) and convert it back to iso8859-1.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 pretty.c                     | 29 +++++++++++++++++++----------
 t/t6006-rev-list-format.sh   | 16 +++++-----------
 t/t6006/commit-msg.iso8859-1 |  5 +++++
 3 files changed, 29 insertions(+), 21 deletions(-)
 create mode 100644 t/t6006/commit-msg.iso8859-1

diff --git a/pretty.c b/pretty.c
index 45fe878..f3275a7 100644
--- a/pretty.c
+++ b/pretty.c
@@ -916,7 +916,8 @@ static size_t parse_color_placeholder(struct strbuf *sb,
 	return 0;
 }
 
-static size_t format_commit_one(struct strbuf *sb, const char *placeholder,
+static size_t format_commit_one(struct strbuf *sb, /* in UTF-8 */
+				const char *placeholder,
 				void *context)
 {
 	struct format_commit_context *c = context;
@@ -1121,7 +1122,8 @@ static size_t format_commit_one(struct strbuf *sb, const char *placeholder,
 	return 0;	/* unknown placeholder */
 }
 
-static size_t format_commit_item(struct strbuf *sb, const char *placeholder,
+static size_t format_commit_item(struct strbuf *sb, /* in UTF-8 */
+				 const char *placeholder,
 				 void *context)
 {
 	struct format_commit_context *c = context;
@@ -1205,25 +1207,32 @@ void format_commit_message(const struct commit *commit,
 	struct format_commit_context context;
 	static const char utf8[] = "UTF-8";
 	const char *output_enc = pretty_ctx->output_encoding;
+	char *enc;
 
 	memset(&context, 0, sizeof(context));
 	context.commit = commit;
 	context.pretty_ctx = pretty_ctx;
 	context.wrap_start = sb->len;
 	context.message = commit->buffer;
-	if (output_enc) {
-		char *enc = get_header(commit, "encoding");
-		if (strcmp(enc ? enc : utf8, output_enc)) {
-			context.message = logmsg_reencode(commit, output_enc);
-			if (!context.message)
-				context.message = commit->buffer;
-		}
-		free(enc);
+	enc = get_header(commit, "encoding");
+	if (enc && strcmp(utf8, enc)) {
+		context.message = reencode_string(context.message, utf8, enc);
+		if (!context.message)
+			context.message = commit->buffer;
 	}
+	free(enc);
 
 	strbuf_expand(sb, format, format_commit_item, &context);
 	rewrap_message_tail(sb, &context, 0, 0, 0);
 
+	if (output_enc && strcmp(utf8, output_enc)) {
+		char *out = reencode_string(sb->buf, output_enc, utf8);
+		if (out) {
+			int len = strlen(out);
+			strbuf_attach(sb, out, len, len + 1);
+		}
+	}
+
 	if (context.message != commit->buffer)
 		free(context.message);
 	free(context.signature.gpg_output);
diff --git a/t/t6006-rev-list-format.sh b/t/t6006-rev-list-format.sh
index f94f0c4..cd24839 100755
--- a/t/t6006-rev-list-format.sh
+++ b/t/t6006-rev-list-format.sh
@@ -124,27 +124,21 @@ commit 86c75cfd708a0e5868dc876ed5b8bb66c80b4873
 ^[[1;31;43mfoo^[[m
 EOF
 
-cat >commit-msg <<'EOF'
-Test printing of complex bodies
-
-This commit message is much longer than the others,
-and it will be encoded in iso8859-1. We should therefore
-include an iso8859 character: ¡bueno!
-EOF
+cat "$TEST_DIRECTORY/t6006/commit-msg.iso8859-1" >commit-msg
 test_expect_success 'setup complex body' '
 git config i18n.commitencoding iso8859-1 &&
   echo change2 >foo && git commit -a -F commit-msg
 '
 
 test_format complex-encoding %e <<'EOF'
-commit f58db70b055c5718631e5c61528b28b12090cdea
+commit 1ed88da4a5b5ed8c449114ac131efc62178734c3
 iso8859-1
 commit 131a310eb913d107dd3c09a65d1651175898735d
 commit 86c75cfd708a0e5868dc876ed5b8bb66c80b4873
 EOF
 
 test_format complex-subject %s <<'EOF'
-commit f58db70b055c5718631e5c61528b28b12090cdea
+commit 1ed88da4a5b5ed8c449114ac131efc62178734c3
 Test printing of complex bodies
 commit 131a310eb913d107dd3c09a65d1651175898735d
 changed foo
@@ -153,7 +147,7 @@ added foo
 EOF
 
 test_format complex-body %b <<'EOF'
-commit f58db70b055c5718631e5c61528b28b12090cdea
+commit 1ed88da4a5b5ed8c449114ac131efc62178734c3
 This commit message is much longer than the others,
 and it will be encoded in iso8859-1. We should therefore
 include an iso8859 character: ¡bueno!
@@ -163,7 +157,7 @@ commit 86c75cfd708a0e5868dc876ed5b8bb66c80b4873
 EOF
 
 test_expect_success '%x00 shows NUL' '
-	echo  >expect commit f58db70b055c5718631e5c61528b28b12090cdea &&
+	echo  >expect commit 1ed88da4a5b5ed8c449114ac131efc62178734c3 &&
 	echo >>expect fooQbar &&
 	git rev-list -1 --format=foo%x00bar HEAD >actual.nul &&
 	nul_to_q <actual.nul >actual &&
diff --git a/t/t6006/commit-msg.iso8859-1 b/t/t6006/commit-msg.iso8859-1
new file mode 100644
index 0000000..f8fe808
--- /dev/null
+++ b/t/t6006/commit-msg.iso8859-1
@@ -0,0 +1,5 @@
+Test printing of complex bodies
+
+This commit message is much longer than the others,
+and it will be encoded in iso8859-1. We should therefore
+include an iso8859 character: ¡bueno!
-- 
1.7.12.1.406.g6ab07c4

  parent reply	other threads:[~2012-09-23  9:18 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-09-22  4:22 [PATCH 0/6] Advanced --oneline layout Nguyễn Thái Ngọc Duy
2012-09-22  4:22 ` [PATCH 1/6] pretty: share code between format_decoration and show_decorations Nguyễn Thái Ngọc Duy
2012-09-22  4:22 ` [PATCH 2/6] pretty: split parsing %C into a separate function Nguyễn Thái Ngọc Duy
2012-09-22  4:22 ` [PATCH 3/6] pretty: support %C(auto[,N]) to turn on coloring on next placeholder(s) Nguyễn Thái Ngọc Duy
2012-09-22  4:22 ` [PATCH 4/6] utf8.c: move display_mode_esc_sequence_len() for use by other functions Nguyễn Thái Ngọc Duy
2012-09-22  4:22 ` [PATCH 5/6] utf8.c: add utf8_strnwidth() with the ability to skip ansi sequences Nguyễn Thái Ngọc Duy
2012-09-22  4:22 ` [PATCH 6/6] pretty: support padding placeholders, %< %> and %<> Nguyễn Thái Ngọc Duy
2012-09-22  4:26 ` [PATCH 7/6] pretty: trim trailing spaces due to padding Nguyễn Thái Ngọc Duy
2012-09-23  9:10 ` [PATCH v2 0/9] Advanced --oneline layout Nguyễn Thái Ngọc Duy
2012-09-23  9:10   ` [PATCH 1/9] pretty: share code between format_decoration and show_decorations Nguyễn Thái Ngọc Duy
2012-09-23  9:10   ` [PATCH 2/9] pretty: split parsing %C into a separate function Nguyễn Thái Ngọc Duy
2012-09-23  9:10   ` [PATCH 3/9] pretty: support %C(auto[,N]) to turn on coloring on next placeholder(s) Nguyễn Thái Ngọc Duy
2012-09-23  9:10   ` [PATCH 4/9] utf8.c: move display_mode_esc_sequence_len() for use by other functions Nguyễn Thái Ngọc Duy
2012-09-23  9:10   ` [PATCH 5/9] utf8.c: add utf8_strnwidth() with the ability to skip ansi sequences Nguyễn Thái Ngọc Duy
2012-09-23  9:10   ` Nguyễn Thái Ngọc Duy [this message]
2012-09-23 13:54     ` [PATCH 6/9] pretty: two phase conversion for non utf-8 commits Robin Rosenberg
2012-09-24  1:21       ` Nguyen Thai Ngoc Duy
2012-09-23  9:10   ` [PATCH 7/9] pretty: support padding placeholders, %< %> and %>< Nguyễn Thái Ngọc Duy
2012-10-24  8:25     ` Jeff King
2012-09-23  9:10   ` [PATCH 8/9] pretty: support truncating in %>, %< " Nguyễn Thái Ngọc Duy
2012-09-23  9:10   ` [PATCH 9/9] pretty: support %>> that steal trailing spaces Nguyễn Thái Ngọc Duy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1348391433-11300-7-git-send-email-pclouds@gmail.com \
    --to=pclouds@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=peff@peff.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).