From: Johannes Schindelin <johannes.schindelin@gmx.de>
To: git@vger.kernel.org
Cc: "Junio C Hamano" <gitster@pobox.com>, "Jeff King" <peff@peff.net>,
"Benjamin Kramer" <benny.kra@googlemail.com>,
"René Scharfe" <l.s.r@web.de>
Subject: [PATCH v4 3/3] regex: use regexec_buf()
Date: Wed, 21 Sep 2016 20:24:14 +0200 (CEST) [thread overview]
Message-ID: <53f3609d99c865d59d7bfd8219a5334339e9e6bc.1474482164.git.johannes.schindelin@gmx.de> (raw)
In-Reply-To: <cover.1474482164.git.johannes.schindelin@gmx.de>
The new regexec_buf() function operates on buffers with an explicitly
specified length, rather than NUL-terminated strings.
We need to use this function whenever the buffer we want to pass to
regexec() may have been mmap()ed (and is hence not NUL-terminated).
Note: the original motivation for this patch was to fix a bug where
`git diff -G <regex>` would crash. This patch converts more callers,
though, some of which explicitly allocated and constructed
NUL-terminated strings (or worse: modified read-only buffers to insert
NULs).
Some of the buffers actually may be NUL-terminated. As regexec_buf()
uses REG_STARTEND where available, but has to fall back to allocating
and constructing NUL-terminated strings where REG_STARTEND is not
available, this makes the code less efficient in the latter case.
However, given the widespread support for REG_STARTEND, combined with
the improved ease of code maintenance, we strike the balance in favor
of REG_STARTEND.
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
diff.c | 3 ++-
diffcore-pickaxe.c | 18 ++++++++----------
grep.c | 14 ++------------
t/t4061-diff-pickaxe.sh | 2 +-
xdiff-interface.c | 13 ++++---------
5 files changed, 17 insertions(+), 33 deletions(-)
diff --git a/diff.c b/diff.c
index c6da383..fb99235 100644
--- a/diff.c
+++ b/diff.c
@@ -952,7 +952,8 @@ static int find_word_boundaries(mmfile_t *buffer, regex_t *word_regex,
{
if (word_regex && *begin < buffer->size) {
regmatch_t match[1];
- if (!regexec(word_regex, buffer->ptr + *begin, 1, match, 0)) {
+ if (!regexec_buf(word_regex, buffer->ptr + *begin,
+ buffer->size - *begin, 1, match, 0)) {
char *p = memchr(buffer->ptr + *begin + match[0].rm_so,
'\n', match[0].rm_eo - match[0].rm_so);
*end = p ? p - buffer->ptr : match[0].rm_eo + *begin;
diff --git a/diffcore-pickaxe.c b/diffcore-pickaxe.c
index 55067ca..9795ca1 100644
--- a/diffcore-pickaxe.c
+++ b/diffcore-pickaxe.c
@@ -23,7 +23,6 @@ static void diffgrep_consume(void *priv, char *line, unsigned long len)
{
struct diffgrep_cb *data = priv;
regmatch_t regmatch;
- int hold;
if (line[0] != '+' && line[0] != '-')
return;
@@ -33,11 +32,8 @@ static void diffgrep_consume(void *priv, char *line, unsigned long len)
* caller early.
*/
return;
- /* Yuck -- line ought to be "const char *"! */
- hold = line[len];
- line[len] = '\0';
- data->hit = !regexec(data->regexp, line + 1, 1, ®match, 0);
- line[len] = hold;
+ data->hit = !regexec_buf(data->regexp, line + 1, len - 1, 1,
+ ®match, 0);
}
static int diff_grep(mmfile_t *one, mmfile_t *two,
@@ -50,9 +46,11 @@ static int diff_grep(mmfile_t *one, mmfile_t *two,
xdemitconf_t xecfg;
if (!one)
- return !regexec(regexp, two->ptr, 1, ®match, 0);
+ return !regexec_buf(regexp, two->ptr, two->size,
+ 1, ®match, 0);
if (!two)
- return !regexec(regexp, one->ptr, 1, ®match, 0);
+ return !regexec_buf(regexp, one->ptr, one->size,
+ 1, ®match, 0);
/*
* We have both sides; need to run textual diff and see if
@@ -83,8 +81,8 @@ static unsigned int contains(mmfile_t *mf, regex_t *regexp, kwset_t kws)
regmatch_t regmatch;
int flags = 0;
- assert(data[sz] == '\0');
- while (*data && !regexec(regexp, data, 1, ®match, flags)) {
+ while (*data &&
+ !regexec_buf(regexp, data, sz, 1, ®match, flags)) {
flags |= REG_NOTBOL;
data += regmatch.rm_eo;
if (*data && regmatch.rm_so == regmatch.rm_eo)
diff --git a/grep.c b/grep.c
index d7d00b8..1194d35 100644
--- a/grep.c
+++ b/grep.c
@@ -898,17 +898,6 @@ static int fixmatch(struct grep_pat *p, char *line, char *eol,
}
}
-static int regmatch(const regex_t *preg, char *line, char *eol,
- regmatch_t *match, int eflags)
-{
-#ifdef REG_STARTEND
- match->rm_so = 0;
- match->rm_eo = eol - line;
- eflags |= REG_STARTEND;
-#endif
- return regexec(preg, line, 1, match, eflags);
-}
-
static int patmatch(struct grep_pat *p, char *line, char *eol,
regmatch_t *match, int eflags)
{
@@ -919,7 +908,8 @@ static int patmatch(struct grep_pat *p, char *line, char *eol,
else if (p->pcre_regexp)
hit = !pcrematch(p, line, eol, match, eflags);
else
- hit = !regmatch(&p->regexp, line, eol, match, eflags);
+ hit = !regexec_buf(&p->regexp, line, eol - line, 1, match,
+ eflags);
return hit;
}
diff --git a/t/t4061-diff-pickaxe.sh b/t/t4061-diff-pickaxe.sh
index 5929f2e..f0bf50b 100755
--- a/t/t4061-diff-pickaxe.sh
+++ b/t/t4061-diff-pickaxe.sh
@@ -14,7 +14,7 @@ test_expect_success setup '
test_tick &&
git commit -m "A 4k file"
'
-test_expect_failure '-G matches' '
+test_expect_success '-G matches' '
git diff --name-only -G "^0{4096}$" HEAD^ >out &&
test 4096-zeroes.txt = "$(cat out)"
'
diff --git a/xdiff-interface.c b/xdiff-interface.c
index 3bfc69c..060038c 100644
--- a/xdiff-interface.c
+++ b/xdiff-interface.c
@@ -214,11 +214,10 @@ struct ff_regs {
static long ff_regexp(const char *line, long len,
char *buffer, long buffer_size, void *priv)
{
- char *line_buffer;
struct ff_regs *regs = priv;
regmatch_t pmatch[2];
int i;
- int result = -1;
+ int result;
/* Exclude terminating newline (and cr) from matching */
if (len > 0 && line[len-1] == '\n') {
@@ -228,18 +227,16 @@ static long ff_regexp(const char *line, long len,
len--;
}
- line_buffer = xstrndup(line, len); /* make NUL terminated */
-
for (i = 0; i < regs->nr; i++) {
struct ff_reg *reg = regs->array + i;
- if (!regexec(®->re, line_buffer, 2, pmatch, 0)) {
+ if (!regexec_buf(®->re, line, len, 2, pmatch, 0)) {
if (reg->negate)
- goto fail;
+ return -1;
break;
}
}
if (regs->nr <= i)
- goto fail;
+ return -1;
i = pmatch[1].rm_so >= 0 ? 1 : 0;
line += pmatch[i].rm_so;
result = pmatch[i].rm_eo - pmatch[i].rm_so;
@@ -248,8 +245,6 @@ static long ff_regexp(const char *line, long len,
while (result > 0 && (isspace(line[result - 1])))
result--;
memcpy(buffer, line, result);
- fail:
- free(line_buffer);
return result;
}
--
2.10.0.windows.1.10.g803177d
next prev parent reply other threads:[~2016-09-21 18:24 UTC|newest]
Thread overview: 66+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-09-05 15:44 [PATCH 0/3] Fix a segfault caused by regexec() being called on mmap()ed data Johannes Schindelin
2016-09-05 15:45 ` [PATCH 1/3] Demonstrate a problem: our pickaxe code assumes NUL-terminated buffers Johannes Schindelin
2016-09-06 18:43 ` Jeff King
2016-09-08 7:53 ` Johannes Schindelin
2016-09-05 15:45 ` [PATCH 2/3] diff_populate_filespec: NUL-terminate buffers Johannes Schindelin
2016-09-06 7:06 ` Jeff King
2016-09-06 16:02 ` Johannes Schindelin
2016-09-06 18:41 ` Jeff King
2016-09-07 18:31 ` Junio C Hamano
2016-09-08 7:52 ` Johannes Schindelin
2016-09-08 7:49 ` Johannes Schindelin
2016-09-08 8:22 ` Jeff King
2016-09-08 16:57 ` Junio C Hamano
2016-09-08 18:22 ` Johannes Schindelin
2016-09-08 18:48 ` Jeff King
2016-09-05 15:45 ` [PATCH 3/3] diff_grep: add assertions verifying that the buffers are NUL-terminated Johannes Schindelin
2016-09-06 7:08 ` Jeff King
2016-09-06 16:04 ` Johannes Schindelin
2016-09-05 19:10 ` [PATCH 0/3] Fix a segfault caused by regexec() being called on mmap()ed data Junio C Hamano
2016-09-06 7:12 ` Jeff King
2016-09-06 14:06 ` Johannes Schindelin
2016-09-06 18:29 ` Jeff King
2016-09-08 7:29 ` Johannes Schindelin
2016-09-08 8:00 ` Jeff King
2016-09-09 10:09 ` Johannes Schindelin
2016-09-09 17:46 ` Junio C Hamano
2016-09-06 13:21 ` Johannes Schindelin
2016-09-06 6:58 ` Jeff King
2016-09-06 14:13 ` Johannes Schindelin
2016-09-08 7:31 ` [PATCH v2 " Johannes Schindelin
2016-09-08 7:31 ` [PATCH v2 2/3] Introduce a function to run regexec() on non-NUL-terminated buffers Johannes Schindelin
2016-09-08 8:04 ` Jeff King
2016-09-09 9:45 ` Johannes Schindelin
2016-09-09 9:59 ` Jeff King
2016-09-08 7:31 ` [PATCH v2 1/3] Demonstrate a problem: our pickaxe code assumes NUL-terminated buffers Johannes Schindelin
2016-09-08 7:31 ` [PATCH v2 3/3] Use the newly-introduced regexec_buf() function Johannes Schindelin
2016-09-08 7:54 ` Johannes Schindelin
2016-09-08 8:10 ` Jeff King
2016-09-08 8:14 ` Jeff King
2016-09-08 8:35 ` Jeff King
2016-09-08 19:06 ` Ramsay Jones
2016-09-08 19:53 ` Jeff King
2016-09-08 21:30 ` Junio C Hamano
2016-09-08 7:33 ` [PATCH v2 0/3] Fix a segfault caused by regexec() being called on mmap()ed data Johannes Schindelin
2016-09-08 8:13 ` Jeff King
2016-09-08 7:57 ` [PATCH v3 " Johannes Schindelin
2016-09-08 7:57 ` [PATCH v3 1/3] Demonstrate a problem: our pickaxe code assumes NUL-terminated buffers Johannes Schindelin
2016-09-08 7:58 ` [PATCH v3 2/3] Introduce a function to run regexec() on non-NUL-terminated buffers Johannes Schindelin
2016-09-08 17:03 ` Junio C Hamano
2016-09-08 7:59 ` [PATCH v3 3/3] Use the newly-introduced regexec_buf() function Johannes Schindelin
2016-09-08 17:09 ` Junio C Hamano
2016-09-09 9:52 ` Johannes Schindelin
2016-09-09 9:57 ` Jeff King
2016-09-09 10:41 ` Johannes Schindelin
2016-09-09 17:49 ` Junio C Hamano
2016-09-21 18:23 ` [PATCH v4 0/3] Fix a segfault caused by regexec() being called on mmap()ed data Johannes Schindelin
2016-09-21 18:23 ` [PATCH v4 1/3] regex: -G<pattern> feeds a non NUL-terminated string to regexec() and fails Johannes Schindelin
2016-09-21 18:24 ` [PATCH v4 2/3] regex: add regexec_buf() that can work on a non NUL-terminated string Johannes Schindelin
2016-09-21 19:17 ` Junio C Hamano
2016-09-22 18:38 ` Johannes Schindelin
2016-09-21 18:24 ` Johannes Schindelin [this message]
2016-09-21 19:18 ` [PATCH v4 3/3] regex: use regexec_buf() Junio C Hamano
2016-09-21 20:09 ` Junio C Hamano
2016-09-21 22:03 ` Jeff King
2016-09-25 14:01 ` Johannes Schindelin
2016-09-21 22:04 ` [PATCH v4 0/3] Fix a segfault caused by regexec() being called on mmap()ed data Jeff King
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: http://vger.kernel.org/majordomo-info.html
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=53f3609d99c865d59d7bfd8219a5334339e9e6bc.1474482164.git.johannes.schindelin@gmx.de \
--to=johannes.schindelin@gmx.de \
--cc=benny.kra@googlemail.com \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
--cc=l.s.r@web.de \
--cc=peff@peff.net \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/mirrors/git.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).