From: Luke Shumaker <lukeshu@lukeshu.com>
To: git@vger.kernel.org
Cc: "Junio C Hamano" <gitster@pobox.com>,
"Elijah Newren" <newren@gmail.com>, "Jeff King" <peff@peff.net>,
"Johannes Schindelin" <Johannes.Schindelin@gmx.de>,
"Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>,
"Taylor Blau" <me@ttaylorr.com>,
"brian m . carlson" <sandals@crustytoothpaste.net>,
"Eric Sunshine" <sunshine@sunshineco.com>,
"Luke Shumaker" <lukeshu@datawire.io>
Subject: [PATCH v4 4/5] fast-export: do not modify memory from get_commit_buffer
Date: Fri, 30 Apr 2021 17:25:36 -0600 [thread overview]
Message-ID: <20210430232537.1131641-5-lukeshu@lukeshu.com> (raw)
In-Reply-To: <20210430232537.1131641-1-lukeshu@lukeshu.com>
From: Luke Shumaker <lukeshu@datawire.io>
fast-export's helper function find_encoding() takes a `const char *`, but
modifies that memory despite the `const`. Ultimately, this memory came
from get_commit_buffer(), and you're not supposed to modify the memory
that you get from get_commit_buffer().
So, get rid of find_encoding() in favor of commit.h:find_commit_header(),
which gives back a string length, rather than mutating the memory to
insert a '\0' terminator.
Because find_commit_header() detects the "\n\n" string that separates the
headers and the commit message, move the call to be above the
`message = strstr(..., "\n\n")` call. This helps readability, and allows
for the value of `encoding` to be used for a better value of "..." so that
the same memory doesn't need to be checked twice. Introduce a
`commit_buffer_cursor` variable to avoid writing an awkward
`encoding ? encoding + encoding_len : committer_end` expression.
Signed-off-by: Luke Shumaker <lukeshu@datawire.io>
---
Notes:
v4: This commit is new in v4.
builtin/fast-export.c | 65 ++++++++++++++++++++++++-------------------
1 file changed, 37 insertions(+), 28 deletions(-)
diff --git a/builtin/fast-export.c b/builtin/fast-export.c
index d1cb8a3183..81f3fb1f05 100644
--- a/builtin/fast-export.c
+++ b/builtin/fast-export.c
@@ -499,21 +499,6 @@ static void show_filemodify(struct diff_queue_struct *q,
}
}
-static const char *find_encoding(const char *begin, const char *end)
-{
- const char *needle = "\nencoding ";
- char *bol, *eol;
-
- bol = memmem(begin, end ? end - begin : strlen(begin),
- needle, strlen(needle));
- if (!bol)
- return NULL;
- bol += strlen(needle);
- eol = strchrnul(bol, '\n');
- *eol = '\0';
- return bol;
-}
-
static char *anonymize_ref_component(void *data)
{
static int counter;
@@ -615,13 +600,26 @@ static void anonymize_ident_line(const char **beg, const char **end)
*end = out->buf + out->len;
}
+static char *reencode_message(const char *in_msg,
+ const char *in_encoding, size_t in_encoding_len)
+{
+ static struct strbuf in_encoding_buf = STRBUF_INIT;
+
+ strbuf_reset(&in_encoding_buf);
+ strbuf_add(&in_encoding_buf, in_encoding, in_encoding_len);
+
+ return reencode_string(in_msg, "UTF-8", in_encoding_buf.buf);
+}
+
static void handle_commit(struct commit *commit, struct rev_info *rev,
struct string_list *paths_of_changed_objects)
{
int saved_output_format = rev->diffopt.output_format;
- const char *commit_buffer;
+ const char *commit_buffer, *commit_buffer_cursor;
const char *author, *author_end, *committer, *committer_end;
- const char *encoding, *message;
+ const char *encoding;
+ size_t encoding_len;
+ const char *message;
char *reencoded = NULL;
struct commit_list *p;
const char *refname;
@@ -630,21 +628,31 @@ static void handle_commit(struct commit *commit, struct rev_info *rev,
rev->diffopt.output_format = DIFF_FORMAT_CALLBACK;
parse_commit_or_die(commit);
- commit_buffer = get_commit_buffer(commit, NULL);
- author = strstr(commit_buffer, "\nauthor ");
+ commit_buffer_cursor = commit_buffer = get_commit_buffer(commit, NULL);
+
+ author = strstr(commit_buffer_cursor, "\nauthor ");
if (!author)
die("could not find author in commit %s",
oid_to_hex(&commit->object.oid));
author++;
- author_end = strchrnul(author, '\n');
- committer = strstr(author_end, "\ncommitter ");
+ commit_buffer_cursor = author_end = strchrnul(author, '\n');
+
+ committer = strstr(commit_buffer_cursor, "\ncommitter ");
if (!committer)
die("could not find committer in commit %s",
oid_to_hex(&commit->object.oid));
committer++;
- committer_end = strchrnul(committer, '\n');
- message = strstr(committer_end, "\n\n");
- encoding = find_encoding(committer_end, message);
+ commit_buffer_cursor = committer_end = strchrnul(committer, '\n');
+
+ /* find_commit_header() gets a `+ 1` because
+ * commit_buffer_cursor points at the trailing "\n" at the end
+ * of the previous line, but find_commit_header() wants a
+ * pointer to the beginning of the next line. */
+ encoding = find_commit_header(commit_buffer_cursor + 1, "encoding", &encoding_len);
+ if (encoding)
+ commit_buffer_cursor = encoding + encoding_len;
+
+ message = strstr(commit_buffer_cursor, "\n\n");
if (message)
message += 2;
@@ -685,14 +693,15 @@ static void handle_commit(struct commit *commit, struct rev_info *rev,
} else if (encoding) {
switch(reencode_mode) {
case REENCODE_YES:
- reencoded = reencode_string(message, "UTF-8", encoding);
+ reencoded = reencode_message(message, encoding, encoding_len);
break;
case REENCODE_NO:
break;
case REENCODE_ABORT:
- die("Encountered commit-specific encoding %s in commit "
+ die("Encountered commit-specific encoding %.*s in commit "
"%s; use --reencode=[yes|no] to handle it",
- encoding, oid_to_hex(&commit->object.oid));
+ (int)encoding_len, encoding,
+ oid_to_hex(&commit->object.oid));
}
}
if (!commit->parents)
@@ -704,7 +713,7 @@ static void handle_commit(struct commit *commit, struct rev_info *rev,
(int)(author_end - author), author,
(int)(committer_end - committer), committer);
if (!reencoded && encoding)
- printf("encoding %s\n", encoding);
+ printf("encoding %.*s\n", (int)encoding_len, encoding);
printf("data %u\n%s",
(unsigned)(reencoded
? strlen(reencoded) : message
--
2.31.1
next prev parent reply other threads:[~2021-04-30 23:26 UTC|newest]
Thread overview: 32+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-04-22 0:27 [PATCH v2 0/3] fast-export, fast-import: implement signed-commits Luke Shumaker
2021-04-22 0:27 ` [PATCH v2 1/3] git-fast-import.txt: add missing LF in the BNF Luke Shumaker
2021-04-22 0:27 ` [PATCH v2 2/3] fast-export: rename --signed-tags='warn' to 'warn-verbatim' Luke Shumaker
2021-04-22 3:59 ` Eric Sunshine
2021-04-22 4:43 ` Luke Shumaker
2021-04-22 4:50 ` Luke Shumaker
2021-04-22 0:27 ` [PATCH v2 3/3] fast-export, fast-import: implement signed-commits Luke Shumaker
2021-04-23 16:41 ` [PATCH v3 0/3] " Luke Shumaker
2021-04-23 16:41 ` [PATCH v3 1/3] git-fast-import.txt: add missing LF in the BNF Luke Shumaker
2021-04-23 16:41 ` [PATCH v3 2/3] fast-export: rename --signed-tags='warn' to 'warn-verbatim' Luke Shumaker
2021-04-28 3:29 ` Junio C Hamano
2021-04-29 19:02 ` Luke Shumaker
2021-04-30 0:03 ` Junio C Hamano
2021-04-23 16:41 ` [PATCH v3 3/3] fast-export, fast-import: implement signed-commits Luke Shumaker
2021-04-28 4:02 ` Junio C Hamano
2021-04-29 20:06 ` Luke Shumaker
2021-04-29 22:38 ` Elijah Newren
2021-04-29 23:42 ` Junio C Hamano
2021-04-30 2:23 ` Elijah Newren
2021-04-30 3:20 ` Junio C Hamano
2021-04-30 17:07 ` Luke Shumaker
2021-04-30 19:34 ` Luke Shumaker
2021-04-30 19:59 ` Elijah Newren
2021-04-30 22:21 ` Luke Shumaker
2021-04-30 23:25 ` [PATCH v4 0/5] fast-export, fast-import: add support for signed-commits Luke Shumaker
2021-04-30 23:25 ` [PATCH v4 1/5] git-fast-import.txt: add missing LF in the BNF Luke Shumaker
2021-04-30 23:25 ` [PATCH v4 2/5] fast-export: rename --signed-tags='warn' to 'warn-verbatim' Luke Shumaker
2021-04-30 23:25 ` [PATCH v4 3/5] git-fast-export.txt: clarify why 'verbatim' may not be a good idea Luke Shumaker
2021-04-30 23:25 ` Luke Shumaker [this message]
2021-05-03 4:41 ` [PATCH v4 4/5] fast-export: do not modify memory from get_commit_buffer Junio C Hamano
2021-04-30 23:25 ` [PATCH v4 5/5] fast-export, fast-import: add support for signed-commits Luke Shumaker
2021-05-03 5:09 ` Junio C Hamano
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: http://vger.kernel.org/majordomo-info.html
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210430232537.1131641-5-lukeshu@lukeshu.com \
--to=lukeshu@lukeshu.com \
--cc=Johannes.Schindelin@gmx.de \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
--cc=lukeshu@datawire.io \
--cc=me@ttaylorr.com \
--cc=newren@gmail.com \
--cc=pclouds@gmail.com \
--cc=peff@peff.net \
--cc=sandals@crustytoothpaste.net \
--cc=sunshine@sunshineco.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/mirrors/git.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).