git@vger.kernel.org mailing list mirror (one of many)
 help / Atom feed
From: Stefan Beller <sbeller@google.com>
To: git@vger.kernel.org
Cc: pclouds@gmail.com, jonathantanmy@google.com, gitster@pobox.com,
	jamill@microsoft.com, Stefan Beller <sbeller@google.com>
Subject: [PATCH v2 01/13] repository: introduce parsed objects field
Date: Mon,  7 May 2018 15:59:04 -0700
Message-ID: <20180507225916.155236-2-sbeller@google.com> (raw)
In-Reply-To: <20180507225916.155236-1-sbeller@google.com>

Convert the existing global cache for parsed objects (obj_hash) into
repository-specific parsed object caches. Existing code that uses
obj_hash are modified to use the parsed object cache of
the_repository; future patches will use the parsed object caches of
other repositories.

Another future use case for a pool of objects is ease of memory management
in revision walking: If we can free the rev-list related memory early in
pack-objects (e.g. part of repack operation) then it could lower memory
pressure significantly when running on large repos. While this has been
discussed on the mailing list lately, this series doesn't implement this.

Signed-off-by: Stefan Beller <sbeller@google.com>
---
 object.c     | 63 +++++++++++++++++++++++++++++++++-------------------
 object.h     |  8 +++++++
 repository.c |  7 ++++++
 repository.h | 13 ++++++++++-
 4 files changed, 67 insertions(+), 24 deletions(-)

diff --git a/object.c b/object.c
index 5044d08e96c..f7c624a7ba6 100644
--- a/object.c
+++ b/object.c
@@ -8,17 +8,14 @@
 #include "object-store.h"
 #include "packfile.h"
 
-static struct object **obj_hash;
-static int nr_objs, obj_hash_size;
-
 unsigned int get_max_object_index(void)
 {
-	return obj_hash_size;
+	return the_repository->parsed_objects->obj_hash_size;
 }
 
 struct object *get_indexed_object(unsigned int idx)
 {
-	return obj_hash[idx];
+	return the_repository->parsed_objects->obj_hash[idx];
 }
 
 static const char *object_type_strings[] = {
@@ -90,15 +87,16 @@ struct object *lookup_object(const unsigned char *sha1)
 	unsigned int i, first;
 	struct object *obj;
 
-	if (!obj_hash)
+	if (!the_repository->parsed_objects->obj_hash)
 		return NULL;
 
-	first = i = hash_obj(sha1, obj_hash_size);
-	while ((obj = obj_hash[i]) != NULL) {
+	first = i = hash_obj(sha1,
+			     the_repository->parsed_objects->obj_hash_size);
+	while ((obj = the_repository->parsed_objects->obj_hash[i]) != NULL) {
 		if (!hashcmp(sha1, obj->oid.hash))
 			break;
 		i++;
-		if (i == obj_hash_size)
+		if (i == the_repository->parsed_objects->obj_hash_size)
 			i = 0;
 	}
 	if (obj && i != first) {
@@ -107,7 +105,8 @@ struct object *lookup_object(const unsigned char *sha1)
 		 * that we do not need to walk the hash table the next
 		 * time we look for it.
 		 */
-		SWAP(obj_hash[i], obj_hash[first]);
+		SWAP(the_repository->parsed_objects->obj_hash[i],
+		     the_repository->parsed_objects->obj_hash[first]);
 	}
 	return obj;
 }
@@ -124,19 +123,19 @@ static void grow_object_hash(void)
 	 * Note that this size must always be power-of-2 to match hash_obj
 	 * above.
 	 */
-	int new_hash_size = obj_hash_size < 32 ? 32 : 2 * obj_hash_size;
+	int new_hash_size = the_repository->parsed_objects->obj_hash_size < 32 ? 32 : 2 * the_repository->parsed_objects->obj_hash_size;
 	struct object **new_hash;
 
 	new_hash = xcalloc(new_hash_size, sizeof(struct object *));
-	for (i = 0; i < obj_hash_size; i++) {
-		struct object *obj = obj_hash[i];
+	for (i = 0; i < the_repository->parsed_objects->obj_hash_size; i++) {
+		struct object *obj = the_repository->parsed_objects->obj_hash[i];
 		if (!obj)
 			continue;
 		insert_obj_hash(obj, new_hash, new_hash_size);
 	}
-	free(obj_hash);
-	obj_hash = new_hash;
-	obj_hash_size = new_hash_size;
+	free(the_repository->parsed_objects->obj_hash);
+	the_repository->parsed_objects->obj_hash = new_hash;
+	the_repository->parsed_objects->obj_hash_size = new_hash_size;
 }
 
 void *create_object(const unsigned char *sha1, void *o)
@@ -147,11 +146,12 @@ void *create_object(const unsigned char *sha1, void *o)
 	obj->flags = 0;
 	hashcpy(obj->oid.hash, sha1);
 
-	if (obj_hash_size - 1 <= nr_objs * 2)
+	if (the_repository->parsed_objects->obj_hash_size - 1 <= the_repository->parsed_objects->nr_objs * 2)
 		grow_object_hash();
 
-	insert_obj_hash(obj, obj_hash, obj_hash_size);
-	nr_objs++;
+	insert_obj_hash(obj, the_repository->parsed_objects->obj_hash,
+			the_repository->parsed_objects->obj_hash_size);
+	the_repository->parsed_objects->nr_objs++;
 	return obj;
 }
 
@@ -431,8 +431,8 @@ void clear_object_flags(unsigned flags)
 {
 	int i;
 
-	for (i=0; i < obj_hash_size; i++) {
-		struct object *obj = obj_hash[i];
+	for (i=0; i < the_repository->parsed_objects->obj_hash_size; i++) {
+		struct object *obj = the_repository->parsed_objects->obj_hash[i];
 		if (obj)
 			obj->flags &= ~flags;
 	}
@@ -442,13 +442,20 @@ void clear_commit_marks_all(unsigned int flags)
 {
 	int i;
 
-	for (i = 0; i < obj_hash_size; i++) {
-		struct object *obj = obj_hash[i];
+	for (i = 0; i < the_repository->parsed_objects->obj_hash_size; i++) {
+		struct object *obj = the_repository->parsed_objects->obj_hash[i];
 		if (obj && obj->type == OBJ_COMMIT)
 			obj->flags &= ~flags;
 	}
 }
 
+struct parsed_object_pool *parsed_object_pool_new(void)
+{
+	struct parsed_object_pool *o = xmalloc(sizeof(*o));
+	memset(o, 0, sizeof(*o));
+	return o;
+}
+
 struct raw_object_store *raw_object_store_new(void)
 {
 	struct raw_object_store *o = xmalloc(sizeof(*o));
@@ -488,3 +495,13 @@ void raw_object_store_clear(struct raw_object_store *o)
 	close_all_packs(o);
 	o->packed_git = NULL;
 }
+
+void parsed_object_pool_clear(struct parsed_object_pool *o)
+{
+	/*
+	 * TOOD free objects in o->obj_hash.
+	 *
+	 * As objects are allocated in slabs (see alloc.c), we do
+	 * not need to free each object, but each slab instead.
+	 */
+}
diff --git a/object.h b/object.h
index f13f85b2a94..cecda7da370 100644
--- a/object.h
+++ b/object.h
@@ -1,6 +1,14 @@
 #ifndef OBJECT_H
 #define OBJECT_H
 
+struct parsed_object_pool {
+	struct object **obj_hash;
+	int nr_objs, obj_hash_size;
+};
+
+struct parsed_object_pool *parsed_object_pool_new(void);
+void parsed_object_pool_clear(struct parsed_object_pool *o);
+
 struct object_list {
 	struct object *item;
 	struct object_list *next;
diff --git a/repository.c b/repository.c
index a4848c1bd05..c23404677eb 100644
--- a/repository.c
+++ b/repository.c
@@ -2,6 +2,7 @@
 #include "repository.h"
 #include "object-store.h"
 #include "config.h"
+#include "object.h"
 #include "submodule-config.h"
 
 /* The main repository */
@@ -14,6 +15,8 @@ void initialize_the_repository(void)
 
 	the_repo.index = &the_index;
 	the_repo.objects = raw_object_store_new();
+	the_repo.parsed_objects = parsed_object_pool_new();
+
 	repo_set_hash_algo(&the_repo, GIT_HASH_SHA1);
 }
 
@@ -143,6 +146,7 @@ static int repo_init(struct repository *repo,
 	memset(repo, 0, sizeof(*repo));
 
 	repo->objects = raw_object_store_new();
+	repo->parsed_objects = parsed_object_pool_new();
 
 	if (repo_init_gitdir(repo, gitdir))
 		goto error;
@@ -226,6 +230,9 @@ void repo_clear(struct repository *repo)
 	raw_object_store_clear(repo->objects);
 	FREE_AND_NULL(repo->objects);
 
+	parsed_object_pool_clear(repo->parsed_objects);
+	FREE_AND_NULL(repo->parsed_objects);
+
 	if (repo->config) {
 		git_configset_clear(repo->config);
 		FREE_AND_NULL(repo->config);
diff --git a/repository.h b/repository.h
index e6e00f541bd..73389e81afd 100644
--- a/repository.h
+++ b/repository.h
@@ -22,10 +22,21 @@ struct repository {
 	char *commondir;
 
 	/*
-	 * Holds any information related to accessing the raw object content.
+	 * Holds any information needed to retrieve the raw content
+	 * of objects. The object_parser uses this to get object
+	 * content which it then parses.
 	 */
 	struct raw_object_store *objects;
 
+	/*
+	 * All objects in this repository that have been parsed. This structure
+	 * owns all objects it references, so users of "struct object *"
+	 * generally do not need to free them; instead, when a repository is no
+	 * longer used, call parsed_object_pool_clear() on this structure, which
+	 * is called by the repositories repo_clear on its desconstruction.
+	 */
+	struct parsed_object_pool *parsed_objects;
+
 	/* The store in which the refs are held. */
 	struct ref_store *refs;
 
-- 
2.17.0.255.g8bfb7c0704


  reply index

Thread overview: 95+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-05-01 21:33 [PATCH 00/13] object store: alloc Stefan Beller
2018-05-01 21:33 ` [PATCH 01/13] repository: introduce object parser field Stefan Beller
2018-05-02 17:17   ` Duy Nguyen
2018-05-02 17:26     ` Stefan Beller
2018-05-02 17:58       ` Duy Nguyen
2018-05-02 20:30   ` Jonathan Tan
2018-05-01 21:33 ` [PATCH 02/13] object: add repository argument to create_object Stefan Beller
2018-05-01 21:43   ` Eric Sunshine
2018-05-01 21:33 ` [PATCH 03/13] object: add repository argument to grow_object_hash Stefan Beller
2018-05-01 21:33 ` [PATCH 04/13] alloc: add repository argument to alloc_blob_node Stefan Beller
2018-05-02 20:34   ` Jonathan Tan
2018-05-01 21:33 ` [PATCH 05/13] alloc: add repository argument to alloc_tree_node Stefan Beller
2018-05-01 21:33 ` [PATCH 06/13] alloc: add repository argument to alloc_commit_node Stefan Beller
2018-05-01 21:33 ` [PATCH 07/13] alloc: add repository argument to alloc_tag_node Stefan Beller
2018-05-01 21:33 ` [PATCH 08/13] alloc: add repository argument to alloc_object_node Stefan Beller
2018-05-01 21:33 ` [PATCH 09/13] alloc: add repository argument to alloc_report Stefan Beller
2018-05-01 21:34 ` [PATCH 10/13] alloc: add repository argument to alloc_commit_index Stefan Beller
2018-05-01 21:34 ` [PATCH 11/13] object: allow grow_object_hash to handle arbitrary repositories Stefan Beller
2018-05-01 21:34 ` [PATCH 12/13] object: allow create_object " Stefan Beller
2018-05-02 20:36   ` Jonathan Tan
2018-05-01 21:34 ` [PATCH 13/13] alloc: allow arbitrary repositories for alloc functions Stefan Beller
2018-05-02 17:44   ` Duy Nguyen
2018-05-03 17:24     ` Stefan Beller
2018-05-03 17:35       ` Duy Nguyen
2018-05-02 20:50   ` Jonathan Tan
2018-05-03 17:25     ` Stefan Beller
2018-05-03 14:58   ` Duy Nguyen
2018-05-02 17:01 ` [PATCH 00/13] object store: alloc Duy Nguyen
2018-05-02 18:07   ` Jameson Miller
2018-05-02 18:22     ` Duy Nguyen
2018-05-02 18:44       ` Jameson Miller
2018-05-03 22:45       ` Stefan Beller
2018-05-07 14:05 ` Junio C Hamano
2018-05-07 20:53   ` Stefan Beller
2018-05-07 22:59 ` [PATCH v2 " Stefan Beller
2018-05-07 22:59   ` Stefan Beller [this message]
2018-05-08 17:23     ` [PATCH v2 01/13] repository: introduce parsed objects field Jonathan Tan
2018-05-07 22:59   ` [PATCH v2 02/13] object: add repository argument to create_object Stefan Beller
2018-05-07 22:59   ` [PATCH v2 03/13] object: add repository argument to grow_object_hash Stefan Beller
2018-05-07 22:59   ` [PATCH v2 04/13] alloc: add repository argument to alloc_blob_node Stefan Beller
2018-05-07 22:59   ` [PATCH v2 05/13] alloc: add repository argument to alloc_tree_node Stefan Beller
2018-05-07 22:59   ` [PATCH v2 06/13] alloc: add repository argument to alloc_commit_node Stefan Beller
2018-05-07 22:59   ` [PATCH v2 07/13] alloc: add repository argument to alloc_tag_node Stefan Beller
2018-05-07 22:59   ` [PATCH v2 08/13] alloc: add repository argument to alloc_object_node Stefan Beller
2018-05-07 22:59   ` [PATCH v2 09/13] alloc: add repository argument to alloc_report Stefan Beller
2018-05-07 22:59   ` [PATCH v2 10/13] alloc: add repository argument to alloc_commit_index Stefan Beller
2018-05-07 22:59   ` [PATCH v2 11/13] object: allow grow_object_hash to handle arbitrary repositories Stefan Beller
2018-05-07 22:59   ` [PATCH v2 12/13] object: allow create_object " Stefan Beller
2018-05-07 22:59   ` [PATCH v2 13/13] alloc: allow arbitrary repositories for alloc functions Stefan Beller
2018-05-08 10:10     ` Jeff King
2018-05-08 15:00     ` Duy Nguyen
2018-05-08 18:38       ` Stefan Beller
2018-05-08 17:45     ` Jonathan Tan
2018-05-08 19:37   ` [PATCH v3 00/13] object store: alloc Stefan Beller
2018-05-08 19:37     ` [PATCH v3 01/13] repository: introduce parsed objects field Stefan Beller
2018-05-08 19:37     ` [PATCH v3 02/13] object: add repository argument to create_object Stefan Beller
2018-05-08 19:37     ` [PATCH v3 03/13] object: add repository argument to grow_object_hash Stefan Beller
2018-05-08 19:37     ` [PATCH v3 04/13] alloc: add repository argument to alloc_blob_node Stefan Beller
2018-05-08 19:37     ` [PATCH v3 05/13] alloc: add repository argument to alloc_tree_node Stefan Beller
2018-05-08 19:37     ` [PATCH v3 06/13] alloc: add repository argument to alloc_commit_node Stefan Beller
2018-05-08 19:37     ` [PATCH v3 07/13] alloc: add repository argument to alloc_tag_node Stefan Beller
2018-05-08 19:37     ` [PATCH v3 08/13] alloc: add repository argument to alloc_object_node Stefan Beller
2018-05-08 19:37     ` [PATCH v3 09/13] alloc: add repository argument to alloc_report Stefan Beller
2018-05-08 19:37     ` [PATCH v3 10/13] alloc: add repository argument to alloc_commit_index Stefan Beller
2018-05-08 19:37     ` [PATCH v3 11/13] object: allow grow_object_hash to handle arbitrary repositories Stefan Beller
2018-05-08 19:37     ` [PATCH v3 12/13] object: allow create_object " Stefan Beller
2018-05-08 19:37     ` [PATCH v3 13/13] alloc: allow arbitrary repositories for alloc functions Stefan Beller
2018-05-08 20:04       ` Jonathan Tan
2018-05-08 20:37         ` Stefan Beller
2018-05-09 15:54           ` Duy Nguyen
2018-05-09 17:18         ` Duy Nguyen
2018-05-09 19:20           ` Stefan Beller
2018-05-10 15:43             ` Duy Nguyen
2018-05-10  0:40     ` [PATCH v4 00/13] object store: alloc Stefan Beller
2018-05-10  0:40       ` [PATCH v4 01/13] repository: introduce parsed objects field Stefan Beller
2018-05-10  0:40       ` [PATCH v4 02/13] object: add repository argument to create_object Stefan Beller
2018-05-10  0:40       ` [PATCH v4 03/13] object: add repository argument to grow_object_hash Stefan Beller
2018-05-10  0:40       ` [PATCH v4 04/13] alloc: add repository argument to alloc_blob_node Stefan Beller
2018-05-10  0:40       ` [PATCH v4 05/13] alloc: add repository argument to alloc_tree_node Stefan Beller
2018-05-10  0:40       ` [PATCH v4 06/13] alloc: add repository argument to alloc_commit_node Stefan Beller
2018-05-10  0:40       ` [PATCH v4 07/13] alloc: add repository argument to alloc_tag_node Stefan Beller
2018-05-10  0:40       ` [PATCH v4 08/13] alloc: add repository argument to alloc_object_node Stefan Beller
2018-05-10  0:40       ` [PATCH v4 09/13] alloc: add repository argument to alloc_report Stefan Beller
2018-05-10  0:40       ` [PATCH v4 10/13] alloc: add repository argument to alloc_commit_index Stefan Beller
2018-05-10  0:40       ` [PATCH v4 11/13] object: allow grow_object_hash to handle arbitrary repositories Stefan Beller
2018-05-10  0:40       ` [PATCH v4 12/13] object: allow create_object " Stefan Beller
2018-05-10  0:40       ` [PATCH v4 13/13] alloc: allow arbitrary repositories for alloc functions Stefan Beller
2018-05-10 17:16       ` [PATCH v4 00/13] object store: alloc Jonathan Tan
2018-05-10 17:32         ` Stefan Beller
2018-05-10 20:56           ` Jonathan Tan
2018-05-10 22:36             ` Stefan Beller
2018-05-11 19:17         ` [PATCH] alloc: allow arbitrary repositories for alloc functions Stefan Beller
2018-05-11 19:38           ` Eric Sunshine
2018-05-15 21:48             ` Stefan Beller
2018-05-16  2:27               ` Junio C Hamano

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180507225916.155236-2-sbeller@google.com \
    --to=sbeller@google.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=jamill@microsoft.com \
    --cc=jonathantanmy@google.com \
    --cc=pclouds@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

git@vger.kernel.org mailing list mirror (one of many)

Archives are clonable:
	git clone --mirror https://public-inbox.org/git
	git clone --mirror http://ou63pmih66umazou.onion/git
	git clone --mirror http://czquwvybam4bgbro.onion/git
	git clone --mirror http://hjrcffqmbrq6wope.onion/git

Newsgroups are available over NNTP:
	nntp://news.public-inbox.org/inbox.comp.version-control.git
	nntp://ou63pmih66umazou.onion/inbox.comp.version-control.git
	nntp://czquwvybam4bgbro.onion/inbox.comp.version-control.git
	nntp://hjrcffqmbrq6wope.onion/inbox.comp.version-control.git
	nntp://news.gmane.org/gmane.comp.version-control.git

 note: .onion URLs require Tor: https://www.torproject.org/
       or Tor2web: https://www.tor2web.org/

AGPL code for this site: git clone https://public-inbox.org/ public-inbox