git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: Junio C Hamano <gitster@pobox.com>
To: Nicolas Pitre <nico@cam.org>
Cc: Nix <nix@esperi.org.uk>, Steven Grimm <koreth@midwinter.com>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	Git Mailing List <git@vger.kernel.org>
Subject: Re: People unaware of the importance of "git gc"?
Date: Wed, 05 Sep 2007 13:01:37 -0700	[thread overview]
Message-ID: <7vr6lcj2zi.fsf@gitster.siamese.dyndns.org> (raw)
In-Reply-To: <alpine.LFD.0.9999.0709051438460.21186@xanadu.home> (Nicolas Pitre's message of "Wed, 05 Sep 2007 14:54:40 -0400 (EDT)")

Nicolas Pitre <nico@cam.org> writes:

> Not only that.  Currently the "Counting objects" phase when running 
> git-gc on the Linux repo takes a significant amount of time, even if 
> there is little to repack.
>
> If any kind of automatic repack is implemented, it should be an 
> incremental repacking only, not the full thing, i.e. git-repack without 
> -a, or git-pack-objects with --unpacked.  The idea is to be the least 
> intrusive as possible.  Also, object walking should be limited to 
> objects linked to a commit object which is itself unpacked in order to 
> cut on the time required to fully enumerate all objects.
>
> This way a semi-packed state will always be preserved and should be good 
> enough.  The full repacking should probably be left to manual execution 
> of git-gc.

Ok, how about doing something like this?

-- >8 -- snipsnap -- >8 -- clipcrap -- >8 --
Implement git gc --auto

This implements a new option "git gc --auto".  When gc.auto is
set to a positive value, and the object database has accumulated
roughly that many number of loose objects, this runs a
lightweight version of "git gc".  The primary difference from
the full "git gc" is that it does not pass "-a" option to "git
repack", which means we do not try to repack _everything_, but
only repack incrementally.  We still do "git prune-packed".  The
default threshold is arbitrarily set by yours truly to:

 - not trigger it for fully unpacked git v0.99 history;

 - do trigger it for fully unpacked git v1.0.0 history;

 - not trigger it for incremental update to git v1.0.0 starting
   from fully packed git v0.99 history.

This patch does not add invocation of the "auto repacking".  It
is left to key Porcelain commands that could produce tons of
loose objects to add a call to "git gc --auto" after they are
done their work.  Obvious candidates are:

	git add
	git fetch
        git merge
        git rebase        

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---

 builtin-gc.c |   64 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 63 insertions(+), 1 deletions(-)

diff --git a/builtin-gc.c b/builtin-gc.c
index 9397482..093b3dd 100644
--- a/builtin-gc.c
+++ b/builtin-gc.c
@@ -20,6 +20,7 @@ static const char builtin_gc_usage[] = "git-gc [--prune] [--aggressive]";
 
 static int pack_refs = 1;
 static int aggressive_window = -1;
+static int gc_auto_threshold = 6700;
 
 #define MAX_ADD 10
 static const char *argv_pack_refs[] = {"pack-refs", "--all", "--prune", NULL};
@@ -28,6 +29,8 @@ static const char *argv_repack[MAX_ADD] = {"repack", "-a", "-d", "-l", NULL};
 static const char *argv_prune[] = {"prune", NULL};
 static const char *argv_rerere[] = {"rerere", "gc", NULL};
 
+static const char *argv_repack_auto[] = {"repack", "-d", "-l", NULL};
+
 static int gc_config(const char *var, const char *value)
 {
 	if (!strcmp(var, "gc.packrefs")) {
@@ -41,6 +44,10 @@ static int gc_config(const char *var, const char *value)
 		aggressive_window = git_config_int(var, value);
 		return 0;
 	}
+	if (!strcmp(var, "gc.auto")) {
+		gc_auto_threshold = git_config_int(var, value);
+		return 0;
+	}
 	return git_default_config(var, value);
 }
 
@@ -57,10 +64,49 @@ static void append_option(const char **cmd, const char *opt, int max_length)
 	cmd[i] = NULL;
 }
 
+static int need_to_gc(void)
+{
+	/*
+	 * Quickly check if a "gc" is needed, by estimating how
+	 * many loose objects there are.  Because SHA-1 is evenly
+	 * distributed, we can check only one and get a reasonable
+	 * estimate.
+	 */
+	char path[PATH_MAX];
+	const char *objdir = get_object_directory();
+	DIR *dir;
+	struct dirent *ent;
+	int auto_threshold;
+	int num_loose = 0;
+	int needed = 0;
+
+	if (sizeof(path) <= snprintf(path, sizeof(path), "%s/17", objdir)) {
+		warning("insanely long object directory %.*s", 50, objdir);
+		return 0;
+	}
+	dir = opendir(path);
+	if (!dir)
+		return 0;
+
+	auto_threshold = (gc_auto_threshold + 255) / 256;
+	while ((ent = readdir(dir)) != NULL) {
+		if (strspn(ent->d_name, "0123456789abcdef") != 38 ||
+		    ent->d_name[38] != '\0')
+			continue;
+		if (++num_loose > auto_threshold) {
+			needed = 1;
+			break;
+		}
+	}
+	closedir(dir);
+	return needed;
+}
+
 int cmd_gc(int argc, const char **argv, const char *prefix)
 {
 	int i;
 	int prune = 0;
+	int auto_gc = 0;
 	char buf[80];
 
 	git_config(gc_config);
@@ -82,12 +128,28 @@ int cmd_gc(int argc, const char **argv, const char *prefix)
 			}
 			continue;
 		}
-		/* perhaps other parameters later... */
+		if (!strcmp(arg, "--auto")) {
+			if (gc_auto_threshold <= 0)
+				return 0;
+			auto_gc = 1;
+			continue;
+		}
 		break;
 	}
 	if (i != argc)
 		usage(builtin_gc_usage);
 
+	if (auto_gc) {
+		/*
+		 * Auto-gc should be least intrusive as possible.
+		 */
+		prune = 0;
+		for (i = 0; i < ARRAY_SIZE(argv_repack_auto); i++)
+			argv_repack[i] = argv_repack_auto[i];
+		if (!need_to_gc())
+			return 0;
+	}
+
 	if (pack_refs && run_command_v_opt(argv_pack_refs, RUN_GIT_CMD))
 		return error(FAILED_RUN, argv_pack_refs[0]);
 

  reply	other threads:[~2007-09-05 20:01 UTC|newest]

Thread overview: 97+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-09-05  7:09 People unaware of the importance of "git gc"? Linus Torvalds
2007-09-05  7:21 ` Martin Langhoff
2007-09-05  7:37   ` Karl Hasselström
2007-09-05  7:30 ` Junio C Hamano
2007-09-05  7:26   ` Tomash Brechko
2007-09-05  8:13   ` Johan Herland
2007-09-05  8:39     ` Matthieu Moy
2007-09-05  8:41       ` Johan Herland
2007-09-05  8:47         ` David Kastrup
2007-09-05  8:51       ` Pierre Habouzit
2007-09-05  9:02         ` David Kastrup
2007-09-05  9:04         ` Matthieu Moy
2007-09-05  8:51   ` Wincent Colaiuta
2007-09-05  7:42 ` Pierre Habouzit
2007-09-05  8:16   ` Junio C Hamano
2007-09-05  8:50   ` Steven Grimm
     [not found]     ` <86ps0xcwxo.fsf@lola.quinscape.zz>
2007-09-05  9:07       ` Steven Grimm
2007-09-05  9:13         ` David Kastrup
2007-09-05  9:07     ` Junio C Hamano
2007-09-05  9:27       ` Martin Langhoff
2007-09-05  9:33         ` Matthieu Moy
2007-09-05 14:17           ` Johan De Messemaeker
2007-09-05 17:31             ` Matthieu Moy
2007-09-05 23:56               ` Jeff King
2007-09-05  9:13     ` David Kastrup
2007-09-05  9:14     ` Pierre Habouzit
2007-09-05 17:51   ` Nix
2007-09-05 18:14     ` Steven Grimm
2007-09-05 18:22       ` Nix
2007-09-05 18:54         ` Nicolas Pitre
2007-09-05 20:01           ` Junio C Hamano [this message]
2007-09-05 20:35             ` Nicolas Pitre
2007-09-05 21:14               ` Nix
2007-09-05 21:46               ` Junio C Hamano
2007-09-05 23:04                 ` Nicolas Pitre
2007-09-05 23:42                   ` Junio C Hamano
2007-09-06  0:27                     ` Carlos Rica
2007-09-06  5:55                 ` David Kastrup
2007-09-05 21:49               ` Junio C Hamano
2007-09-05 21:59                 ` Invoke "git gc --auto" from commit, merge, am and rebase Junio C Hamano
2007-09-06  2:39                   ` Shawn O. Pearce
2007-09-05 20:37             ` [PATCH] Invoke "git gc --auto" from "git add" and "git fetch" Junio C Hamano
     [not found]               ` <69b0c0350709051357ifa547aarfe3e0b36cf9be98f@mail.gmail.com>
2007-09-05 20:59                 ` Fwd: " Govind Salinas
2007-09-06 12:02               ` Johannes Schindelin
2007-09-05 21:18             ` People unaware of the importance of "git gc"? Alex Riesen
2007-09-06  2:44             ` Russ Dill
2007-09-06  2:52               ` Shawn O. Pearce
2007-09-06  9:28               ` Andreas Ericsson
2007-09-06  2:45             ` Shawn O. Pearce
2007-09-06  2:49               ` Steven Grimm
2007-09-06  2:56                 ` Shawn O. Pearce
2007-09-06 15:54             ` Johannes Schindelin
2007-09-06 17:49               ` Junio C Hamano
2007-09-06 18:15                 ` Linus Torvalds
2007-09-06 18:29                   ` Steven Grimm
2007-09-06 23:12                   ` Subject: [PATCH] git-merge-pack Junio C Hamano
2007-09-06 23:35                     ` Linus Torvalds
2007-09-07  0:51                     ` Nicolas Pitre
2007-09-07  1:58                       ` Junio C Hamano
2007-09-07  2:32                         ` Nicolas Pitre
2007-09-07  4:07                       ` Shawn O. Pearce
2007-09-07  4:43                       ` Junio C Hamano
2007-09-08  9:50                         ` [PATCH] make sha1_file.c::matches_pack_name() available to others Junio C Hamano
2007-09-08 10:01                         ` [PATCH] pack-objects --repack-unpacked Junio C Hamano
2007-09-07  7:11                     ` Subject: [PATCH] git-merge-pack Johannes Sixt
2007-09-07  7:34                       ` Junio C Hamano
2007-09-07  7:24                     ` Andy Parkins
2007-09-07  4:48                 ` People unaware of the importance of "git gc"? Shawn O. Pearce
2007-09-07 10:12                 ` Johannes Schindelin
2018-10-07 18:28           ` What's so special about objects/17/ ? Ævar Arnfjörð Bjarmason
2018-10-07 18:35             ` Johannes Sixt
2018-10-07 19:06               ` Ævar Arnfjörð Bjarmason
2018-10-07 22:39                 ` Johannes Sixt
2018-10-08  0:54                   ` Junio C Hamano
2018-10-07 19:46             ` Junio C Hamano
2018-10-07 20:07               ` Junio C Hamano
2018-10-08 19:17                 ` Stefan Beller
2018-10-09  1:03                   ` Junio C Hamano
2018-10-09 17:37                     ` Stefan Beller
2018-10-10  1:10                       ` Junio C Hamano
2018-10-10 19:08                         ` Stefan Beller
2018-10-08 10:36               ` Ævar Arnfjörð Bjarmason
2018-10-09  1:07                 ` Junio C Hamano
2018-10-09 17:40                   ` Stefan Beller
2007-09-05  8:16 ` People unaware of the importance of "git gc"? David Kastrup
2007-09-05 16:47 ` Govind Salinas
2007-09-05 17:19   ` Carl Worth
2007-09-05 17:55     ` Jing Xue
2007-09-05 17:35   ` Steven Grimm
2007-09-05 18:28     ` Nix
2007-09-05 17:44 ` J. Bruce Fields
2007-09-05 18:46   ` Brandon Casey
2007-09-05 19:09     ` David Kastrup
2007-09-05 19:13       ` J. Bruce Fields
2007-09-05 19:43         ` David Kastrup
2007-09-05 19:20       ` Mike Hommey
2007-09-05 21:07 ` Alex Riesen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=7vr6lcj2zi.fsf@gitster.siamese.dyndns.org \
    --to=gitster@pobox.com \
    --cc=git@vger.kernel.org \
    --cc=koreth@midwinter.com \
    --cc=nico@cam.org \
    --cc=nix@esperi.org.uk \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).