git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: "Elijah Newren via GitGitGadget" <gitgitgadget@gmail.com>
To: git@vger.kernel.org
Cc: Matheus Tavares <matheus.bernardino@usp.br>,
	Elijah Newren <newren@gmail.com>,
	Elijah Newren <newren@gmail.com>
Subject: [PATCH 2/3] mem-pool: use more standard initialization and finalization
Date: Fri, 14 Aug 2020 03:02:14 +0000	[thread overview]
Message-ID: <f13a52055cd975d457e0593cbabb70897e78024b.1597374135.git.gitgitgadget@gmail.com> (raw)
In-Reply-To: <pull.830.git.git.1597374135.gitgitgadget@gmail.com>

From: Elijah Newren <newren@gmail.com>

A typical memory type, such as strbuf, hashmap, or string_list can be
stored on the stack or embedded within another structure.  mem_pool
cannot be, because of how mem_pool_init() and mem_pool_discard() are
written.  mem_pool_init() does essentially the following (simplified
for purposes of explanation here):

    void mem_pool_init(struct mem_pool **pool...)
    {
        *pool = xcalloc(1, sizeof(*pool));

It seems weird to require that mem_pools can only be accessed through a
pointer.  It also seems slightly dangerous: unlike strbuf_release() or
strbuf_reset() or string_list_clear(), all of which put the data
structure into a state where it can be re-used after the call,
mem_pool_discard(pool) will leave pool pointing at free'd memory.
read-cache (and split-index) are the only current users of mem_pools,
and they haven't fallen into a use-after-free mistake here, but it seems
likely to be problematic for future users especially since several of
the current callers of mem_pool_init() will only call it when the
mem_pool* is not already allocated (i.e. is NULL).

This type of mechanism also prevents finding synchronization
points where one can free existing memory and then resume more
operations.  It would be natural at such points to run something like
    mem_pool_discard(pool...);
and, if necessary,
    mem_pool_init(&pool...);
and then carry on continuing to use the pool.  However, this fails badly
if several objects had a copy of the value of pool from before these
commands; in such a case, those objects won't get the updated value of
pool that mem_pool_init() overwrites pool with and they'll all instead
be reading and writing from free'd memory.

Modify mem_pool_init()/mem_pool_discard() to behave more like
   strbuf_init()/strbuf_release()
or
   string_list_init()/string_list_clear()
In particular: (1) make mem_pool_init() just take a mem_pool* and have
it only worry about allocating struct mp_blocks, not the struct mem_pool
itself, (2) make mem_pool_discard() free the memory that the pool was
responsible for, but leave it in a state where it can be used to
allocate more memory afterward (without the need to call mem_pool_init()
again).

Signed-off-by: Elijah Newren <newren@gmail.com>
---
 mem-pool.c    | 20 +++++++-------------
 mem-pool.h    |  4 ++--
 read-cache.c  | 21 +++++++++++++--------
 split-index.c |  6 ++++--
 4 files changed, 26 insertions(+), 25 deletions(-)

diff --git a/mem-pool.c b/mem-pool.c
index 3a8c54d9df..b7d789823e 100644
--- a/mem-pool.c
+++ b/mem-pool.c
@@ -33,21 +33,14 @@ static struct mp_block *mem_pool_alloc_block(struct mem_pool *mem_pool, size_t b
 	return p;
 }
 
-void mem_pool_init(struct mem_pool **mem_pool, size_t initial_size)
+void mem_pool_init(struct mem_pool *mem_pool, size_t initial_size)
 {
-	struct mem_pool *pool;
-
-	if (*mem_pool)
-		return;
-
-	pool = xcalloc(1, sizeof(*pool));
-
-	pool->block_alloc = BLOCK_GROWTH_SIZE;
+	mem_pool->mp_block = NULL;
+	mem_pool->pool_alloc = 0;
+	mem_pool->block_alloc = BLOCK_GROWTH_SIZE;
 
 	if (initial_size > 0)
-		mem_pool_alloc_block(pool, initial_size, NULL);
-
-	*mem_pool = pool;
+		mem_pool_alloc_block(mem_pool, initial_size, NULL);
 }
 
 void mem_pool_discard(struct mem_pool *mem_pool, int invalidate_memory)
@@ -66,7 +59,8 @@ void mem_pool_discard(struct mem_pool *mem_pool, int invalidate_memory)
 		free(block_to_free);
 	}
 
-	free(mem_pool);
+	mem_pool->mp_block = NULL;
+	mem_pool->pool_alloc = 0;
 }
 
 void *mem_pool_alloc(struct mem_pool *mem_pool, size_t len)
diff --git a/mem-pool.h b/mem-pool.h
index fcaa2d462b..30b7a8c03b 100644
--- a/mem-pool.h
+++ b/mem-pool.h
@@ -24,10 +24,10 @@ struct mem_pool {
 /*
  * Initialize mem_pool with specified initial size.
  */
-void mem_pool_init(struct mem_pool **mem_pool, size_t initial_size);
+void mem_pool_init(struct mem_pool *mem_pool, size_t initial_size);
 
 /*
- * Discard a memory pool and free all the memory it is responsible for.
+ * Discard all the memory the memory pool is responsible for.
  */
 void mem_pool_discard(struct mem_pool *mem_pool, int invalidate_memory);
 
diff --git a/read-cache.c b/read-cache.c
index 8ed1c29b54..fa291cdbee 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -89,8 +89,10 @@ static struct mem_pool *find_mem_pool(struct index_state *istate)
 	else
 		pool_ptr = &istate->ce_mem_pool;
 
-	if (!*pool_ptr)
-		mem_pool_init(pool_ptr, 0);
+	if (!*pool_ptr) {
+		*pool_ptr = xmalloc(sizeof(**pool_ptr));
+		mem_pool_init(*pool_ptr, 0);
+	}
 
 	return *pool_ptr;
 }
@@ -2006,11 +2008,12 @@ static unsigned long load_all_cache_entries(struct index_state *istate,
 {
 	unsigned long consumed;
 
+	istate->ce_mem_pool = xmalloc(sizeof(*istate->ce_mem_pool));
 	if (istate->version == 4) {
-		mem_pool_init(&istate->ce_mem_pool,
+		mem_pool_init(istate->ce_mem_pool,
 				estimate_cache_size_from_compressed(istate->cache_nr));
 	} else {
-		mem_pool_init(&istate->ce_mem_pool,
+		mem_pool_init(istate->ce_mem_pool,
 				estimate_cache_size(mmap_size, istate->cache_nr));
 	}
 
@@ -2070,7 +2073,8 @@ static unsigned long load_cache_entries_threaded(struct index_state *istate, con
 	if (istate->name_hash_initialized)
 		BUG("the name hash isn't thread safe");
 
-	mem_pool_init(&istate->ce_mem_pool, 0);
+	istate->ce_mem_pool = xmalloc(sizeof(*istate->ce_mem_pool));
+	mem_pool_init(istate->ce_mem_pool, 0);
 
 	/* ensure we have no more threads than we have blocks to process */
 	if (nr_threads > ieot->nr)
@@ -2097,11 +2101,12 @@ static unsigned long load_cache_entries_threaded(struct index_state *istate, con
 		nr = 0;
 		for (j = p->ieot_start; j < p->ieot_start + p->ieot_blocks; j++)
 			nr += p->ieot->entries[j].nr;
+		istate->ce_mem_pool = xmalloc(sizeof(*istate->ce_mem_pool));
 		if (istate->version == 4) {
-			mem_pool_init(&p->ce_mem_pool,
+			mem_pool_init(p->ce_mem_pool,
 				estimate_cache_size_from_compressed(nr));
 		} else {
-			mem_pool_init(&p->ce_mem_pool,
+			mem_pool_init(p->ce_mem_pool,
 				estimate_cache_size(mmap_size, nr));
 		}
 
@@ -2358,7 +2363,7 @@ int discard_index(struct index_state *istate)
 
 	if (istate->ce_mem_pool) {
 		mem_pool_discard(istate->ce_mem_pool, should_validate_cache_entries());
-		istate->ce_mem_pool = NULL;
+		FREE_AND_NULL(istate->ce_mem_pool);
 	}
 
 	return 0;
diff --git a/split-index.c b/split-index.c
index e6154e4ea9..c0e8ad670d 100644
--- a/split-index.c
+++ b/split-index.c
@@ -79,8 +79,10 @@ void move_cache_to_base_index(struct index_state *istate)
 	if (si->base &&
 		si->base->ce_mem_pool) {
 
-		if (!istate->ce_mem_pool)
-			mem_pool_init(&istate->ce_mem_pool, 0);
+		if (!istate->ce_mem_pool) {
+			istate->ce_mem_pool = xmalloc(sizeof(struct mem_pool));
+			mem_pool_init(istate->ce_mem_pool, 0);
+		}
 
 		mem_pool_combine(istate->ce_mem_pool, istate->split_index->base->ce_mem_pool);
 	}
-- 
gitgitgadget


  parent reply	other threads:[~2020-08-14  3:02 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-08-14  3:02 [PATCH 0/3] Extend and add a little more generalization to the mem_pool API Elijah Newren via GitGitGadget
2020-08-14  3:02 ` [PATCH 1/3] mem-pool: add convenience functions for xstrdup and xstrndup Elijah Newren via GitGitGadget
2020-08-14  4:42   ` Eric Sunshine
2020-08-14  3:02 ` Elijah Newren via GitGitGadget [this message]
2020-08-14  4:38   ` [PATCH 2/3] mem-pool: use more standard initialization and finalization Junio C Hamano
2020-08-14  3:02 ` [PATCH 3/3] mem-pool: use consistent pool variable name Elijah Newren via GitGitGadget
2020-08-14  3:51 ` [PATCH 0/3] Extend and add a little more generalization to the mem_pool API Matheus Tavares Bernardino
2020-08-14  6:00 ` [PATCH v2 " Elijah Newren via GitGitGadget
2020-08-14  6:00   ` [PATCH v2 1/3] mem-pool: add convenience functions for xstrdup and xstrndup Elijah Newren via GitGitGadget
2020-08-14  8:21     ` René Scharfe
2020-08-14  6:00   ` [PATCH v2 2/3] mem-pool: use more standard initialization and finalization Elijah Newren via GitGitGadget
2020-08-14  6:00   ` [PATCH v2 3/3] mem-pool: use consistent pool variable name Elijah Newren via GitGitGadget
2020-08-15 17:37   ` [PATCH v3 0/3] Extend and add a little more generalization to the mem_pool API Elijah Newren via GitGitGadget
2020-08-15 17:37     ` [PATCH v3 1/3] mem-pool: add convenience functions for strdup and strndup Elijah Newren via GitGitGadget
2020-08-15 17:37     ` [PATCH v3 2/3] mem-pool: use more standard initialization and finalization Elijah Newren via GitGitGadget
2020-08-15 17:37     ` [PATCH v3 3/3] mem-pool: use consistent pool variable name Elijah Newren via GitGitGadget
2020-08-18 19:27     ` [PATCH v3 0/3] Extend and add a little more generalization to the mem_pool API Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=f13a52055cd975d457e0593cbabb70897e78024b.1597374135.git.gitgitgadget@gmail.com \
    --to=gitgitgadget@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=matheus.bernardino@usp.br \
    --cc=newren@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).