From: Jonathan Tan <jonathantanmy@google.com>
To: git@vger.kernel.org
Cc: Jonathan Tan <jonathantanmy@google.com>,
peff@peff.net, gitster@pobox.com
Subject: [RFC/PATCH 1/3] mailinfo: refactor commit message processing
Date: Fri, 16 Sep 2016 10:37:22 -0700 [thread overview]
Message-ID: <7dbb4bc0659056211b27f0033c73f0d558efdb54.1474047135.git.jonathantanmy@google.com> (raw)
In-Reply-To: <cover.1474047135.git.jonathantanmy@google.com>
In-Reply-To: <cover.1474047135.git.jonathantanmy@google.com>
Within the processing of the commit message, check for a scissors line
or a patchbreak line first (before checking for in-body headers) so that
a subsequent patch modifying the processing of in-body headers would not
cause a scissors line or patchbreak line to be misidentified.
If a line could be both an in-body header and a scissors line (for
example, "From: -- >8 --"), this is considered a fatal error
(previously, it would be interpreted as an in-body header). (It is not
possible for a line to be both an in-body header and a patchbreak line,
since both require different prefixes.)
The following enumeration shows that processing is the same except (as
described above) the in-body header + scissors line case.
o in-body header (check_header OK)
o passes UTF-8 conversion
o [described above] is scissors line
o [not possible] is patchbreak line
o [not possible] is blank line
o is none of the above - processed as header
o fails UTF-8 conversion - processed as header
o not in-body header
o passes UTF-8 conversion
o is scissors line - processed as scissors
o is patchbreak line - processed as patchbreak
o is blank line - ignored if in header_stage
o is none of the above - log message
o fails UTF-8 conversion - input error
As for the result left in "line" (after the invocation of
handle_commit_msg), it is unused (by its caller, handle_filter, and by
handle_filter's callers, handle_boundary and handle_body) unless this
line is a patchbreak line, in which case handle_patch is subsequently
called (in handle_filter) on "line". In this case, "line" must have
passed UTF-8 conversion both before and after this patch, so the result
is still the same overall.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
---
mailinfo.c | 145 ++++++++++++++++++++++++++++++++++++++++++++++++-------------
1 file changed, 115 insertions(+), 30 deletions(-)
diff --git a/mailinfo.c b/mailinfo.c
index e19abe3..23a56c2 100644
--- a/mailinfo.c
+++ b/mailinfo.c
@@ -340,23 +340,56 @@ static struct strbuf *decode_b_segment(const struct strbuf *b_seg)
return out;
}
-static int convert_to_utf8(struct mailinfo *mi,
- struct strbuf *line, const char *charset)
+/*
+ * Attempts to convert line into UTF-8, storing the result in line.
+ *
+ * This differs from convert_to_utf8 in that conversion non-success is not
+ * considered an error case - mi->input_error is not set, and no error message
+ * is printed.
+ *
+ * If the conversion is unnecessary, returns 0 and stores NULL in old_buf (if
+ * old_buf is not NULL).
+ *
+ * If the conversion is successful, returns 0 and stores the unconverted string
+ * in old_buf and old_len (if they are respectively not NULL).
+ *
+ * If the conversion is unsuccessful, returns -1.
+ */
+static int try_convert_to_utf8(const struct mailinfo *mi, struct strbuf *line,
+ const char *charset, char **old_buf,
+ size_t *old_len)
{
- char *out;
+ char *utf8;
- if (!mi->metainfo_charset || !charset || !*charset)
+ if (!mi->metainfo_charset || !charset || !*charset ||
+ same_encoding(mi->metainfo_charset, charset)) {
+ if (old_buf)
+ *old_buf = NULL;
return 0;
+ }
- if (same_encoding(mi->metainfo_charset, charset))
+ utf8 = reencode_string(line->buf, mi->metainfo_charset, charset);
+ if (utf8) {
+ char *temp = strbuf_detach(line, old_len);
+ if (old_buf)
+ *old_buf = temp;
+ strbuf_attach(line, utf8, strlen(utf8), strlen(utf8));
return 0;
- out = reencode_string(line->buf, mi->metainfo_charset, charset);
- if (!out) {
+ }
+ return -1;
+}
+
+/*
+ * Converts line into UTF-8, setting mi->input_error to -1 upon failure.
+ */
+static int convert_to_utf8(struct mailinfo *mi,
+ struct strbuf *line, const char *charset)
+{
+ if (try_convert_to_utf8(mi, line, charset, NULL, NULL)) {
mi->input_error = -1;
return error("cannot convert from %s to %s",
charset, mi->metainfo_charset);
}
- strbuf_attach(line, out, strlen(out), strlen(out));
return 0;
}
@@ -515,6 +548,13 @@ static int check_header(struct mailinfo *mi,
return ret;
}
+static int check_header_raw(struct mailinfo *mi,
+ char *buf, size_t len,
+ struct strbuf *hdr_data[], int overwrite) {
+ const struct strbuf sb = {0, len, buf};
+ return check_header(mi, &sb, hdr_data, overwrite);
+}
+
static void decode_transfer_encoding(struct mailinfo *mi, struct strbuf *line)
{
struct strbuf *ret;
@@ -623,32 +663,48 @@ static int is_scissors_line(const struct strbuf *line)
gap * 2 < perforation);
}
-static int handle_commit_msg(struct mailinfo *mi, struct strbuf *line)
+static int resembles_rfc2822_header(const struct strbuf *line)
{
- assert(!mi->filter_stage);
+ char *c;
- if (mi->header_stage) {
- if (!line->len || (line->len == 1 && line->buf[0] == '\n'))
+ if (!isalpha(line->buf[0]))
+ return 0;
+
+ for (c = line->buf + 1; *c != 0; c++) {
+ if (*c == ':')
+ return 1;
+ else if (*c != '-' && !isalpha(*c))
return 0;
}
+ return 0;
+}
- if (mi->use_inbody_headers && mi->header_stage) {
- mi->header_stage = check_header(mi, line, mi->s_hdr_data, 0);
- if (mi->header_stage)
- return 0;
- } else
- /* Only trim the first (blank) line of the commit message
- * when ignoring in-body headers.
- */
- mi->header_stage = 0;
+static int handle_commit_msg(struct mailinfo *mi, struct strbuf *line)
+{
+ int ret = 0;
+ int utf8_result;
+ char *old_buf;
+ size_t old_len;
+
+ assert(!mi->filter_stage);
- /* normalize the log message to UTF-8. */
- if (convert_to_utf8(mi, line, mi->charset.buf))
- return 0; /* mi->input_error already set */
+ /*
+ * Obtain UTF8 for scissors line and patchbreak checks, but retain the
+ * undecoded line in case we need to process it as an in-body header.
+ */
+ utf8_result = try_convert_to_utf8(mi, line, mi->charset.buf, &old_buf,
+ &old_len);
- if (mi->use_scissors && is_scissors_line(line)) {
+ if (!utf8_result && mi->use_scissors && is_scissors_line(line)) {
int i;
+ if (resembles_rfc2822_header(line))
+ /*
+ * Explicitly reject scissor lines that resemble a RFC
+ * 2822 header, to avoid being prone to error.
+ */
+ die("scissors line resembles RFC 2822 header");
+
strbuf_setlen(&mi->log_message, 0);
mi->header_stage = 1;
@@ -661,18 +717,47 @@ static int handle_commit_msg(struct mailinfo *mi, struct strbuf *line)
strbuf_release(mi->s_hdr_data[i]);
mi->s_hdr_data[i] = NULL;
}
- return 0;
+ goto handle_commit_msg_out;
}
-
- if (patchbreak(line)) {
+ if (!utf8_result && patchbreak(line)) {
if (mi->message_id)
strbuf_addf(&mi->log_message,
"Message-Id: %s\n", mi->message_id);
- return 1;
+ ret = 1;
+ goto handle_commit_msg_out;
}
+ if (mi->header_stage) {
+ char *buf = old_buf ? old_buf : line->buf;
+ if (buf[0] == 0 || (buf[0] == '\n' && buf[1] == 0))
+ goto handle_commit_msg_out;
+ }
+
+ if (mi->use_inbody_headers && mi->header_stage) {
+ char *buf = old_buf ? old_buf : line->buf;
+ size_t len = old_buf ? old_len : line->len;
+ mi->header_stage = check_header_raw(mi, buf, len,
+ mi->s_hdr_data, 0);
+ if (mi->header_stage)
+ goto handle_commit_msg_out;
+ } else
+ /* Only trim the first (blank) line of the commit message
+ * when ignoring in-body headers.
+ */
+ mi->header_stage = 0;
+
+ /* If adding as a log message, conversion to UTF-8 is required. */
+ if (utf8_result) {
+ mi->input_error = -1;
+ error("cannot convert from %s to %s",
+ mi->charset.buf, mi->metainfo_charset);
+ goto handle_commit_msg_out;
+ }
strbuf_addbuf(&mi->log_message, line);
- return 0;
+
+handle_commit_msg_out:
+ free(old_buf);
+ return ret;
}
static void handle_patch(struct mailinfo *mi, const struct strbuf *line)
--
2.10.0.rc2.20.g5b18e70
next prev parent reply other threads:[~2016-09-16 17:37 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-09-02 19:58 [PATCH] sequencer: support folding in rfc2822 footer Jonathan Tan
2016-09-03 2:23 ` Junio C Hamano
2016-09-06 22:08 ` Jonathan Tan
2016-09-06 23:30 ` Jonathan Tan
2016-09-07 6:38 ` Jeff King
2016-09-16 17:37 ` [RFC/PATCH 0/3] handle multiline in-body headers Jonathan Tan
2016-09-16 18:29 ` Junio C Hamano
2016-09-16 17:37 ` Jonathan Tan [this message]
2016-09-16 19:12 ` [RFC/PATCH 1/3] mailinfo: refactor commit message processing Junio C Hamano
2016-09-16 21:46 ` Jeff King
2016-09-16 17:37 ` [RFC/PATCH 2/3] mailinfo: correct malformed test example Jonathan Tan
2016-09-16 19:19 ` Junio C Hamano
2016-09-16 22:42 ` Jonathan Tan
2016-09-16 22:55 ` Junio C Hamano
2016-09-17 0:31 ` Jonathan Tan
2016-09-17 3:48 ` Junio C Hamano
2016-09-16 17:37 ` [RFC/PATCH 3/3] mailinfo: handle in-body header continuations Jonathan Tan
2016-09-16 20:17 ` Junio C Hamano
2016-09-16 20:49 ` Jonathan Tan
2016-09-16 20:59 ` Junio C Hamano
2016-09-16 22:36 ` Jonathan Tan
2016-09-16 23:04 ` Junio C Hamano
2016-09-17 0:22 ` Jonathan Tan
2016-09-16 21:51 ` Jeff King
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: http://vger.kernel.org/majordomo-info.html
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=7dbb4bc0659056211b27f0033c73f0d558efdb54.1474047135.git.jonathantanmy@google.com \
--to=jonathantanmy@google.com \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
--cc=peff@peff.net \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/mirrors/git.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).