git@vger.kernel.org list mirror (unofficial, one of many)
 help / color / mirror / Atom feed
From: Johannes Schindelin <Johannes.Schindelin@gmx.de>
To: "René Scharfe" <l.s.r@web.de>
Cc: Carlo Arenas <carenas@gmail.com>,
	git@vger.kernel.org, avarab@gmail.com, gitster@pobox.com,
	michal.kiedrowicz@gmail.com
Subject: Re: [RFC PATCH v5 0/3] grep: almost no more leaks, hopefully no crashes
Date: Fri, 9 Aug 2019 23:26:23 +0200 (CEST)
Message-ID: <nycvar.QRO.7.76.6.1908092325480.46@tvgsbejvaqbjf.bet> (raw)
In-Reply-To: <d239326e-11c3-5875-13a8-f4123baea6eb@web.de>

[-- Attachment #1: Type: text/plain, Size: 5283 bytes --]

Hi,


On Fri, 9 Aug 2019, René Scharfe wrote:

> Am 09.08.19 um 13:24 schrieb Carlo Arenas:
> > disregard this version, it still broken (and wouldn't even build on
> > some cases), the reasons why are still unclear though but at least it
> > might
> > seem from the last known run in windows that segfaults were prevented
> > at last and something was still off enough to trigger a BUG (shouldn't
> > be a concern with V6 or later that do PCRE2 migration to NED fully, as
> > agreed)
>
> So how about starting stupidly simple?

FWIW I am very much in favor of this approach.

Thanks,
Dscho

> You can test it everywhere, it just needs libpcre2.  It works without
> that library as well (tested with "make USE_LIBPCRE= USE_LIBPCRE2=
> test"), but doesn't allocate anything in that case, of course.  The
> character tables leak fix should be safe on top.  If you detect
> performance issues then we can address them in additional patches.
>
> -- >8 --
> Subject: [PATCH] grep: use xmalloc() for all PCRE2 allocations
>
> Build a PCRE2 global custom context when compiling a pattern and use it
> to tell the library to use xmalloc() for allocations.  This provides
> consistent out-of-memory handling and makes sure it uses a custom
> allocator, e.g. with USE_NED_ALLOCATOR.
>
> Signed-off-by: René Scharfe <l.s.r@web.de>
> ---
> The function names are ridiculously long, I tried to stay within 80
> characters per line but gave up in the end and just kept going without
> line breaks.  Fits the "stupidly simple" approach..
>
>  grep.c | 23 +++++++++++++++++------
>  grep.h |  2 ++
>  2 files changed, 19 insertions(+), 6 deletions(-)
>
> diff --git a/grep.c b/grep.c
> index cd952ef5d3..44f4e38657 100644
> --- a/grep.c
> +++ b/grep.c
> @@ -482,6 +482,16 @@ static void free_pcre1_regexp(struct grep_pat *p)
>  #endif /* !USE_LIBPCRE1 */
>
>  #ifdef USE_LIBPCRE2
> +static void *pcre2_malloc(PCRE2_SIZE size, MAYBE_UNUSED void *memory_data)
> +{
> +	return xmalloc(size);
> +}
> +
> +static void pcre2_free(void *pointer, MAYBE_UNUSED void *memory_data)
> +{
> +	free(pointer);
> +}
> +
>  static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt)
>  {
>  	int error;
> @@ -495,12 +505,12 @@ static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt
>
>  	assert(opt->pcre2);
>
> -	p->pcre2_compile_context = NULL;
> +	p->pcre2_general_context = pcre2_general_context_create(pcre2_malloc, pcre2_free, NULL);
> +	p->pcre2_compile_context = pcre2_compile_context_create(p->pcre2_general_context);
>
>  	if (opt->ignore_case) {
>  		if (has_non_ascii(p->pattern)) {
> -			character_tables = pcre2_maketables(NULL);
> -			p->pcre2_compile_context = pcre2_compile_context_create(NULL);
> +			character_tables = pcre2_maketables(p->pcre2_general_context);
>  			pcre2_set_character_tables(p->pcre2_compile_context, character_tables);
>  		}
>  		options |= PCRE2_CASELESS;
> @@ -513,7 +523,7 @@ static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt
>  					 p->pcre2_compile_context);
>
>  	if (p->pcre2_pattern) {
> -		p->pcre2_match_data = pcre2_match_data_create_from_pattern(p->pcre2_pattern, NULL);
> +		p->pcre2_match_data = pcre2_match_data_create_from_pattern(p->pcre2_pattern, p->pcre2_general_context);
>  		if (!p->pcre2_match_data)
>  			die("Couldn't allocate PCRE2 match data");
>  	} else {
> @@ -550,10 +560,10 @@ static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt
>  			return;
>  		}
>
> -		p->pcre2_jit_stack = pcre2_jit_stack_create(1, 1024 * 1024, NULL);
> +		p->pcre2_jit_stack = pcre2_jit_stack_create(1, 1024 * 1024, p->pcre2_general_context);
>  		if (!p->pcre2_jit_stack)
>  			die("Couldn't allocate PCRE2 JIT stack");
> -		p->pcre2_match_context = pcre2_match_context_create(NULL);
> +		p->pcre2_match_context = pcre2_match_context_create(p->pcre2_general_context);
>  		if (!p->pcre2_match_context)
>  			die("Couldn't allocate PCRE2 match context");
>  		pcre2_jit_stack_assign(p->pcre2_match_context, NULL, p->pcre2_jit_stack);
> @@ -605,6 +615,7 @@ static void free_pcre2_pattern(struct grep_pat *p)
>  	pcre2_match_data_free(p->pcre2_match_data);
>  	pcre2_jit_stack_free(p->pcre2_jit_stack);
>  	pcre2_match_context_free(p->pcre2_match_context);
> +	pcre2_general_context_free(p->pcre2_general_context);
>  }
>  #else /* !USE_LIBPCRE2 */
>  static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt)
> diff --git a/grep.h b/grep.h
> index 1875880f37..73b8b87a3a 100644
> --- a/grep.h
> +++ b/grep.h
> @@ -28,6 +28,7 @@ typedef int pcre_jit_stack;
>  #else
>  typedef int pcre2_code;
>  typedef int pcre2_match_data;
> +typedef int pcre2_general_context;
>  typedef int pcre2_compile_context;
>  typedef int pcre2_match_context;
>  typedef int pcre2_jit_stack;
> @@ -93,6 +94,7 @@ struct grep_pat {
>  	int pcre1_jit_on;
>  	pcre2_code *pcre2_pattern;
>  	pcre2_match_data *pcre2_match_data;
> +	pcre2_general_context *pcre2_general_context;
>  	pcre2_compile_context *pcre2_compile_context;
>  	pcre2_match_context *pcre2_match_context;
>  	pcre2_jit_stack *pcre2_jit_stack;
> --
> 2.22.0
>

  parent reply	other threads:[~2019-08-09 21:26 UTC|newest]

Thread overview: 67+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-08-05 11:51 [PATCH 0/1] Fix a problem with PCRE2 and nedmalloc, found via Azure Pipelines Johannes Schindelin via GitGitGadget
2019-08-05 11:51 ` [PATCH 1/1] pcre2: allow overriding the system allocator Johannes Schindelin via GitGitGadget
2019-08-05 16:19   ` Carlo Arenas
2019-08-05 16:27     ` Carlo Arenas
2019-08-05 19:32       ` Johannes Schindelin
2019-08-05 19:26     ` Johannes Schindelin
2019-08-05 21:53     ` Junio C Hamano
2019-08-06  6:24       ` Carlo Arenas
2019-08-06  8:50       ` [PATCH 0/3] grep: no leaks (WIP) Carlo Marcelo Arenas Belón
2019-08-06  8:50         ` [PATCH 1/3] grep: make PCRE1 aware of custom allocator Carlo Marcelo Arenas Belón
2019-08-08 13:54           ` Johannes Schindelin
2019-08-08 15:19             ` Carlo Arenas
2019-08-06  8:50         ` [PATCH 2/3] grep: make PCRE2 " Carlo Marcelo Arenas Belón
2019-08-08 13:56           ` Johannes Schindelin
2019-08-08 14:32             ` Carlo Arenas
2019-08-06  8:50         ` [PATCH 3/3] grep: avoid leak of chartables in PCRE2 Carlo Marcelo Arenas Belón
2019-08-06 16:36         ` [RFC PATCH v3 0/3] grep: no leaks or crashes (windows testing needed) Carlo Marcelo Arenas Belón
2019-08-06 16:36           ` [RFC PATCH v3 1/3] grep: make PCRE1 aware of custom allocator Carlo Marcelo Arenas Belón
2019-08-06 16:36           ` [RFC PATCH v3 2/3] grep: make PCRE2 " Carlo Marcelo Arenas Belón
2019-08-07  5:38             ` René Scharfe
2019-08-07  9:49               ` Carlo Arenas
2019-08-07 13:02                 ` René Scharfe
2019-08-07 13:08                   ` [PATCH 1/2] nedmalloc: do assignments only after the declaration section René Scharfe
2019-08-07 13:09                   ` [PATCH 2/2] nedmalloc: avoid compiler warning about unused value René Scharfe
2019-08-08  2:35                   ` [RFC PATCH v3 2/3] grep: make PCRE2 aware of custom allocator Carlo Arenas
2019-08-08  7:07                     ` René Scharfe
2019-08-08 12:38                       ` Carlo Arenas
2019-08-08 14:29                         ` René Scharfe
2019-08-08 20:18                           ` Johannes Schindelin
2019-08-07 18:15                 ` Junio C Hamano
2019-08-06 16:36           ` [RFC PATCH v3 3/3] grep: avoid leak of chartables in PCRE2 Carlo Marcelo Arenas Belón
2019-08-06 16:48           ` [RFC PATCH v3 0/3] grep: no leaks or crashes (windows testing needed) Junio C Hamano
2019-08-07 21:39           ` [RFC PATCH v4 " Carlo Marcelo Arenas Belón
2019-08-07 21:39             ` [RFC PATCH v4 1/3] grep: make PCRE1 aware of custom allocator Carlo Marcelo Arenas Belón
2019-08-07 21:39             ` [RFC PATCH v4 2/3] grep: make PCRE2 " Carlo Marcelo Arenas Belón
2019-08-07 22:28               ` Junio C Hamano
2019-08-07 21:39             ` [RFC PATCH v4 3/3] grep: avoid leak of chartables in PCRE2 Carlo Marcelo Arenas Belón
2019-08-09  3:02             ` [RFC PATCH v5 0/3] grep: almost no more leaks, hopefully no crashes Carlo Marcelo Arenas Belón
2019-08-09  3:02               ` [RFC PATCH v5 1/3] grep: make PCRE1 aware of custom allocator Carlo Marcelo Arenas Belón
2019-08-09  3:02               ` [RFC PATCH v5 2/3] grep: make PCRE2 " Carlo Marcelo Arenas Belón
2019-08-27  9:07                 ` Johannes Schindelin
2019-08-27 11:51                   ` Carlo Arenas
2019-10-03  5:02                     ` Junio C Hamano
2019-10-03  8:08                       ` Johannes Schindelin
2019-10-03 11:17                         ` Carlo Arenas
2019-10-03 18:23                           ` Johannes Schindelin
2019-10-03 22:57                           ` Junio C Hamano
2019-08-09  3:02               ` [RFC PATCH v5 3/3] grep: avoid leak of chartables in PCRE2 Carlo Marcelo Arenas Belón
2019-08-09 11:24               ` [RFC PATCH v5 0/3] grep: almost no more leaks, hopefully no crashes Carlo Arenas
2019-08-09 17:01                 ` René Scharfe
2019-08-09 17:46                   ` Junio C Hamano
2019-08-09 21:26                   ` Johannes Schindelin [this message]
2019-08-10  3:03                     ` [PATCH] SQUASH Carlo Marcelo Arenas Belón
2019-08-10  7:57                       ` René Scharfe
2019-08-10  8:42                         ` Carlo Arenas
2019-08-10 19:47                           ` René Scharfe
2019-08-12  7:35                             ` Carlo Arenas
2019-08-12 12:14                               ` René Scharfe
2019-08-12 12:28                                 ` Carlo Arenas
2019-08-10 13:57                       ` Johannes Schindelin
2019-08-10  3:05                     ` [RFC PATCH v5 0/3] grep: almost no more leaks, hopefully no crashes Carlo Arenas
2019-08-10  7:56                       ` René Scharfe
2019-08-10 12:40                         ` Carlo Arenas
2019-08-10 21:16                           ` René Scharfe
2019-08-08 20:21           ` [RFC PATCH v3 0/3] grep: no leaks or crashes (windows testing needed) Johannes Schindelin
2019-08-09  6:52             ` Carlo Arenas
2019-08-09 21:13               ` Johannes Schindelin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=nycvar.QRO.7.76.6.1908092325480.46@tvgsbejvaqbjf.bet \
    --to=johannes.schindelin@gmx.de \
    --cc=avarab@gmail.com \
    --cc=carenas@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=l.s.r@web.de \
    --cc=michal.kiedrowicz@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

git@vger.kernel.org list mirror (unofficial, one of many)

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://public-inbox.org/git
	git clone --mirror http://ou63pmih66umazou.onion/git
	git clone --mirror http://czquwvybam4bgbro.onion/git
	git clone --mirror http://hjrcffqmbrq6wope.onion/git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V1 git git/ https://public-inbox.org/git \
		git@vger.kernel.org
	public-inbox-index git

Example config snippet for mirrors.
Newsgroups are available over NNTP:
	nntp://news.public-inbox.org/inbox.comp.version-control.git
	nntp://ou63pmih66umazou.onion/inbox.comp.version-control.git
	nntp://czquwvybam4bgbro.onion/inbox.comp.version-control.git
	nntp://hjrcffqmbrq6wope.onion/inbox.comp.version-control.git
	nntp://news.gmane.io/gmane.comp.version-control.git
 note: .onion URLs require Tor: https://www.torproject.org/

code repositories for the project(s) associated with this inbox:

	https://80x24.org/mirrors/git.git

AGPL code for this site: git clone https://public-inbox.org/public-inbox.git