git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: Johannes Schindelin <johannes.schindelin@gmx.de>
To: git@vger.kernel.org
Cc: Junio C Hamano <gitster@pobox.com>, Jeff King <peff@peff.net>
Subject: [PATCH v2 3/3] Use the newly-introduced regexec_buf() function
Date: Thu, 8 Sep 2016 09:31:19 +0200 (CEST)	[thread overview]
Message-ID: <324ecba64eb0436988aca846fb444eafda290d13.1473319844.git.johannes.schindelin@gmx.de> (raw)
In-Reply-To: <cover.1473319844.git.johannes.schindelin@gmx.de>

The new regexec_buf() function operates on buffers with an explicitly
specified length, rather than NUL-terminated strings.

We need to use this function whenever the buffer we want to pass to
regexec() may have been mmap()ed (and is hence not NUL-terminated).

Note: the original motivation for this patch was to fix a bug where
`git diff -G <regex>` would crash. This patch converts more callers,
though, some of which explicitly allocated and constructed
NUL-terminated strings (or worse: modified read-only buffers to insert
NULs).

Some of the buffers actually may be NUL-terminated. As regexec_buf()
uses REG_STARTEND where available, but has to fall back to allocating
and constructing NUL-terminated strings where REG_STARTEND is not
available, this makes the code less efficient in the latter case.

However, given the widespread support for REG_STARTEND, combined with
the improved ease of code maintenance, we strike the balance in favor
of REG_STARTEND.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 diff.c             |  3 ++-
 diffcore-pickaxe.c | 18 ++++++++----------
 xdiff-interface.c  | 13 ++++---------
 3 files changed, 14 insertions(+), 20 deletions(-)

diff --git a/diff.c b/diff.c
index 534c12e..526775a 100644
--- a/diff.c
+++ b/diff.c
@@ -951,7 +951,8 @@ static int find_word_boundaries(mmfile_t *buffer, regex_t *word_regex,
 {
 	if (word_regex && *begin < buffer->size) {
 		regmatch_t match[1];
-		if (!regexec(word_regex, buffer->ptr + *begin, 1, match, 0)) {
+		if (!regexec_buf(word_regex, buffer->ptr + *begin,
+				 buffer->size - *begin, 1, match, 0)) {
 			char *p = memchr(buffer->ptr + *begin + match[0].rm_so,
 					'\n', match[0].rm_eo - match[0].rm_so);
 			*end = p ? p - buffer->ptr : match[0].rm_eo + *begin;
diff --git a/diffcore-pickaxe.c b/diffcore-pickaxe.c
index 55067ca..9795ca1 100644
--- a/diffcore-pickaxe.c
+++ b/diffcore-pickaxe.c
@@ -23,7 +23,6 @@ static void diffgrep_consume(void *priv, char *line, unsigned long len)
 {
 	struct diffgrep_cb *data = priv;
 	regmatch_t regmatch;
-	int hold;
 
 	if (line[0] != '+' && line[0] != '-')
 		return;
@@ -33,11 +32,8 @@ static void diffgrep_consume(void *priv, char *line, unsigned long len)
 		 * caller early.
 		 */
 		return;
-	/* Yuck -- line ought to be "const char *"! */
-	hold = line[len];
-	line[len] = '\0';
-	data->hit = !regexec(data->regexp, line + 1, 1, &regmatch, 0);
-	line[len] = hold;
+	data->hit = !regexec_buf(data->regexp, line + 1, len - 1, 1,
+				 &regmatch, 0);
 }
 
 static int diff_grep(mmfile_t *one, mmfile_t *two,
@@ -50,9 +46,11 @@ static int diff_grep(mmfile_t *one, mmfile_t *two,
 	xdemitconf_t xecfg;
 
 	if (!one)
-		return !regexec(regexp, two->ptr, 1, &regmatch, 0);
+		return !regexec_buf(regexp, two->ptr, two->size,
+				    1, &regmatch, 0);
 	if (!two)
-		return !regexec(regexp, one->ptr, 1, &regmatch, 0);
+		return !regexec_buf(regexp, one->ptr, one->size,
+				    1, &regmatch, 0);
 
 	/*
 	 * We have both sides; need to run textual diff and see if
@@ -83,8 +81,8 @@ static unsigned int contains(mmfile_t *mf, regex_t *regexp, kwset_t kws)
 		regmatch_t regmatch;
 		int flags = 0;
 
-		assert(data[sz] == '\0');
-		while (*data && !regexec(regexp, data, 1, &regmatch, flags)) {
+		while (*data &&
+		       !regexec_buf(regexp, data, sz, 1, &regmatch, flags)) {
 			flags |= REG_NOTBOL;
 			data += regmatch.rm_eo;
 			if (*data && regmatch.rm_so == regmatch.rm_eo)
diff --git a/xdiff-interface.c b/xdiff-interface.c
index f34ea76..50702a2 100644
--- a/xdiff-interface.c
+++ b/xdiff-interface.c
@@ -214,11 +214,10 @@ struct ff_regs {
 static long ff_regexp(const char *line, long len,
 		char *buffer, long buffer_size, void *priv)
 {
-	char *line_buffer;
 	struct ff_regs *regs = priv;
 	regmatch_t pmatch[2];
 	int i;
-	int result = -1;
+	int result;
 
 	/* Exclude terminating newline (and cr) from matching */
 	if (len > 0 && line[len-1] == '\n') {
@@ -228,18 +227,16 @@ static long ff_regexp(const char *line, long len,
 			len--;
 	}
 
-	line_buffer = xstrndup(line, len); /* make NUL terminated */
-
 	for (i = 0; i < regs->nr; i++) {
 		struct ff_reg *reg = regs->array + i;
-		if (!regexec(&reg->re, line_buffer, 2, pmatch, 0)) {
+		if (!regexec_buf(&reg->re, line, len, 2, pmatch, 0)) {
 			if (reg->negate)
-				goto fail;
+				return -1;
 			break;
 		}
 	}
 	if (regs->nr <= i)
-		goto fail;
+		return -1;
 	i = pmatch[1].rm_so >= 0 ? 1 : 0;
 	line += pmatch[i].rm_so;
 	result = pmatch[i].rm_eo - pmatch[i].rm_so;
@@ -248,8 +245,6 @@ static long ff_regexp(const char *line, long len,
 	while (result > 0 && (isspace(line[result - 1])))
 		result--;
 	memcpy(buffer, line, result);
- fail:
-	free(line_buffer);
 	return result;
 }
 
-- 
2.10.0.windows.1.10.g803177d

  parent reply	other threads:[~2016-09-08  7:31 UTC|newest]

Thread overview: 66+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-09-05 15:44 [PATCH 0/3] Fix a segfault caused by regexec() being called on mmap()ed data Johannes Schindelin
2016-09-05 15:45 ` [PATCH 1/3] Demonstrate a problem: our pickaxe code assumes NUL-terminated buffers Johannes Schindelin
2016-09-06 18:43   ` Jeff King
2016-09-08  7:53     ` Johannes Schindelin
2016-09-05 15:45 ` [PATCH 2/3] diff_populate_filespec: NUL-terminate buffers Johannes Schindelin
2016-09-06  7:06   ` Jeff King
2016-09-06 16:02     ` Johannes Schindelin
2016-09-06 18:41       ` Jeff King
2016-09-07 18:31         ` Junio C Hamano
2016-09-08  7:52           ` Johannes Schindelin
2016-09-08  7:49         ` Johannes Schindelin
2016-09-08  8:22           ` Jeff King
2016-09-08 16:57             ` Junio C Hamano
2016-09-08 18:22               ` Johannes Schindelin
2016-09-08 18:48               ` Jeff King
2016-09-05 15:45 ` [PATCH 3/3] diff_grep: add assertions verifying that the buffers are NUL-terminated Johannes Schindelin
2016-09-06  7:08   ` Jeff King
2016-09-06 16:04     ` Johannes Schindelin
2016-09-05 19:10 ` [PATCH 0/3] Fix a segfault caused by regexec() being called on mmap()ed data Junio C Hamano
2016-09-06  7:12   ` Jeff King
2016-09-06 14:06     ` Johannes Schindelin
2016-09-06 18:29       ` Jeff King
2016-09-08  7:29         ` Johannes Schindelin
2016-09-08  8:00           ` Jeff King
2016-09-09 10:09             ` Johannes Schindelin
2016-09-09 17:46               ` Junio C Hamano
2016-09-06 13:21   ` Johannes Schindelin
2016-09-06  6:58 ` Jeff King
2016-09-06 14:13   ` Johannes Schindelin
2016-09-08  7:31 ` [PATCH v2 " Johannes Schindelin
2016-09-08  7:31   ` [PATCH v2 2/3] Introduce a function to run regexec() on non-NUL-terminated buffers Johannes Schindelin
2016-09-08  8:04     ` Jeff King
2016-09-09  9:45       ` Johannes Schindelin
2016-09-09  9:59         ` Jeff King
2016-09-08  7:31   ` [PATCH v2 1/3] Demonstrate a problem: our pickaxe code assumes NUL-terminated buffers Johannes Schindelin
2016-09-08  7:31   ` Johannes Schindelin [this message]
2016-09-08  7:54     ` [PATCH v2 3/3] Use the newly-introduced regexec_buf() function Johannes Schindelin
2016-09-08  8:10       ` Jeff King
2016-09-08  8:14         ` Jeff King
2016-09-08  8:35           ` Jeff King
2016-09-08 19:06             ` Ramsay Jones
2016-09-08 19:53               ` Jeff King
2016-09-08 21:30                 ` Junio C Hamano
2016-09-08  7:33   ` [PATCH v2 0/3] Fix a segfault caused by regexec() being called on mmap()ed data Johannes Schindelin
2016-09-08  8:13     ` Jeff King
2016-09-08  7:57   ` [PATCH v3 " Johannes Schindelin
2016-09-08  7:57     ` [PATCH v3 1/3] Demonstrate a problem: our pickaxe code assumes NUL-terminated buffers Johannes Schindelin
2016-09-08  7:58     ` [PATCH v3 2/3] Introduce a function to run regexec() on non-NUL-terminated buffers Johannes Schindelin
2016-09-08 17:03       ` Junio C Hamano
2016-09-08  7:59     ` [PATCH v3 3/3] Use the newly-introduced regexec_buf() function Johannes Schindelin
2016-09-08 17:09       ` Junio C Hamano
2016-09-09  9:52         ` Johannes Schindelin
2016-09-09  9:57           ` Jeff King
2016-09-09 10:41             ` Johannes Schindelin
2016-09-09 17:49           ` Junio C Hamano
2016-09-21 18:23     ` [PATCH v4 0/3] Fix a segfault caused by regexec() being called on mmap()ed data Johannes Schindelin
2016-09-21 18:23       ` [PATCH v4 1/3] regex: -G<pattern> feeds a non NUL-terminated string to regexec() and fails Johannes Schindelin
2016-09-21 18:24       ` [PATCH v4 2/3] regex: add regexec_buf() that can work on a non NUL-terminated string Johannes Schindelin
2016-09-21 19:17         ` Junio C Hamano
2016-09-22 18:38           ` Johannes Schindelin
2016-09-21 18:24       ` [PATCH v4 3/3] regex: use regexec_buf() Johannes Schindelin
2016-09-21 19:18         ` Junio C Hamano
2016-09-21 20:09           ` Junio C Hamano
2016-09-21 22:03         ` Jeff King
2016-09-25 14:01           ` Johannes Schindelin
2016-09-21 22:04       ` [PATCH v4 0/3] Fix a segfault caused by regexec() being called on mmap()ed data Jeff King

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=324ecba64eb0436988aca846fb444eafda290d13.1473319844.git.johannes.schindelin@gmx.de \
    --to=johannes.schindelin@gmx.de \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=peff@peff.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).