git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
* [PATCH] fast-import: implement --min-pack-size parameter
@ 2016-04-23  2:42 Eric Wong
  2016-04-23  3:13 ` Jeff King
  0 siblings, 1 reply; 6+ messages in thread
From: Eric Wong @ 2016-04-23  2:42 UTC (permalink / raw)
  To: git; +Cc: Jonathan Nieder, Jeff King, Junio C Hamano, Shawn Pearce

With many incremental imports, small packs become highly
inefficient due to the need to readdir scan and load many
indices to locate even a single object.  Frequent repacking and
consolidation may be prohibitively expensive in terms of disk
I/O, especially in large repositories where the initial packs
were aggressively optimized and marked with .keep files.

In those cases, users may be better served with loose objects
and relying on "git gc --auto".

Signed-off-by: Eric Wong <normalperson@yhbt.net>
---
  There should be a matching config file directive, but I'm
  not sure how/if it should affect other commands.  So I'm
  not sure if it should be "pack.packSizeMin" or
  "fastimport.packSizeMin" or something else.

  To further reduce disk I/O, the fsync_or_die call in
  fixup_pack_header_footer could probably be moved out of that
  function and become the fphf caller's responsibility.

 Documentation/git-fast-import.txt   |  9 ++++++++
 fast-import.c                       | 30 ++++++++++++++++++++++++++
 t/t9302-fast-import-min-packsize.sh | 42 +++++++++++++++++++++++++++++++++++++
 3 files changed, 81 insertions(+)
 create mode 100755 t/t9302-fast-import-min-packsize.sh

diff --git a/Documentation/git-fast-import.txt b/Documentation/git-fast-import.txt
index 66910aa..8c0ac94 100644
--- a/Documentation/git-fast-import.txt
+++ b/Documentation/git-fast-import.txt
@@ -136,6 +136,15 @@ Performance and Compression Tuning
 	Maximum size of each output packfile.
 	The default is unlimited.
 
+--min-pack-size=<n>::
+	Mininum size of an output packfile, packfiles smaller
+	than this threshold are unpacked into loose objects and
+	the pack is discarded.  This is useful when performing
+	small, incremental imports as loose objects and relying
+	on `git gc --auto` may be more efficient than generating
+	many tiny packs.
+	The default is to always preserve the pack and never
+	generate loose objects.
 
 Performance
 -----------
diff --git a/fast-import.c b/fast-import.c
index 9fc7093..a00bee5 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -166,6 +166,7 @@ Format of STDIN stream:
 #include "quote.h"
 #include "exec_cmd.h"
 #include "dir.h"
+#include "run-command.h"
 
 #define PACK_ID_BITS 16
 #define MAX_PACK_ID ((1<<PACK_ID_BITS)-1)
@@ -282,6 +283,7 @@ struct recent_command {
 /* Configured limits on output */
 static unsigned long max_depth = 10;
 static off_t max_packsize;
+static off_t min_packsize;
 static int force_update;
 static int pack_compression_level = Z_DEFAULT_COMPRESSION;
 static int pack_compression_seen;
@@ -950,6 +952,22 @@ static void unkeep_all_packs(void)
 	}
 }
 
+static int loosen_small_pack(const struct packed_git *p)
+{
+	struct child_process unpack = CHILD_PROCESS_INIT;
+
+	if (lseek(p->pack_fd, 0, SEEK_SET) < 0)
+		die_errno("Failed seeking to start of '%s'", p->pack_name);
+
+	unpack.in = p->pack_fd;
+	unpack.git_cmd = 1;
+	unpack.stdout_to_stderr = 1;
+	argv_array_push(&unpack.args, "unpack-objects");
+	argv_array_push(&unpack.args, "-q");
+
+	return run_command(&unpack);
+}
+
 static void end_packfile(void)
 {
 	static int running;
@@ -972,6 +990,12 @@ static void end_packfile(void)
 		fixup_pack_header_footer(pack_data->pack_fd, pack_data->sha1,
 				    pack_data->pack_name, object_count,
 				    cur_pack_sha1, pack_size);
+
+		if (pack_size < min_packsize) {
+			if (loosen_small_pack(pack_data) == 0)
+				goto discard_pack;
+		}
+
 		close(pack_data->pack_fd);
 		idx_name = keep_pack(create_index());
 
@@ -1002,6 +1026,7 @@ static void end_packfile(void)
 		pack_id++;
 	}
 	else {
+discard_pack:
 		close(pack_data->pack_fd);
 		unlink_or_warn(pack_data->pack_name);
 	}
@@ -3237,6 +3262,11 @@ static int parse_one_option(const char *option)
 			v = 1024 * 1024;
 		}
 		max_packsize = v;
+	} else if (skip_prefix(option, "min-pack-size=", &option)) {
+		unsigned long v;
+		if (!git_parse_ulong(option, &v))
+			return 0;
+		min_packsize = v;
 	} else if (skip_prefix(option, "big-file-threshold=", &option)) {
 		unsigned long v;
 		if (!git_parse_ulong(option, &v))
diff --git a/t/t9302-fast-import-min-packsize.sh b/t/t9302-fast-import-min-packsize.sh
new file mode 100755
index 0000000..7dcdccc
--- /dev/null
+++ b/t/t9302-fast-import-min-packsize.sh
@@ -0,0 +1,42 @@
+#!/bin/sh
+test_description='test git fast-import min-packsize'
+. ./test-lib.sh
+
+test_expect_success 'create loose objects on import' '
+	test_tick &&
+	cat >input <<-INPUT_END &&
+	commit refs/heads/master
+	committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE
+	data <<COMMIT
+	initial
+	COMMIT
+
+	done
+	INPUT_END
+
+	git fast-import --done --min-pack-size=1g <input &&
+	git fsck --no-progress &&
+	test $(find .git/objects/?? -type f | wc -l) -eq 2 &&
+	test $(find .git/objects/pack -type f | wc -l) -eq 0
+'
+
+test_expect_success 'bigger packs are preserved' '
+	test_tick &&
+	cat >input <<-INPUT_END &&
+	commit refs/heads/master
+	committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE
+	data <<COMMIT
+	incremental should create a pack
+	COMMIT
+	from refs/heads/master^0
+
+	done
+	INPUT_END
+
+	git fast-import --done --min-pack-size=10 <input &&
+	git fsck --no-progress &&
+	test $(find .git/objects/?? -type f | wc -l) -eq 2 &&
+	test $(find .git/objects/pack -type f | wc -l) -eq 2
+'
+
+test_done
-- 
EW

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] fast-import: implement --min-pack-size parameter
  2016-04-23  2:42 [PATCH] fast-import: implement --min-pack-size parameter Eric Wong
@ 2016-04-23  3:13 ` Jeff King
  2016-04-24  4:32   ` [PATCH v2] fast-import: implement unpack limit Eric Wong
  0 siblings, 1 reply; 6+ messages in thread
From: Jeff King @ 2016-04-23  3:13 UTC (permalink / raw)
  To: Eric Wong; +Cc: git, Jonathan Nieder, Junio C Hamano, Shawn Pearce

On Sat, Apr 23, 2016 at 02:42:25AM +0000, Eric Wong wrote:

> With many incremental imports, small packs become highly
> inefficient due to the need to readdir scan and load many
> indices to locate even a single object.  Frequent repacking and
> consolidation may be prohibitively expensive in terms of disk
> I/O, especially in large repositories where the initial packs
> were aggressively optimized and marked with .keep files.
> 
> In those cases, users may be better served with loose objects
> and relying on "git gc --auto".
> 
> Signed-off-by: Eric Wong <normalperson@yhbt.net>
> ---
>   There should be a matching config file directive, but I'm
>   not sure how/if it should affect other commands.  So I'm
>   not sure if it should be "pack.packSizeMin" or
>   "fastimport.packSizeMin" or something else.

This same concept exists for pushing/fetching, but there we measure it
not in bytes but by the number of objects. Which is probably a better
measure. A single 10MB blob is better as a loose object than as a pack,
but a thousand 10KB blobs should be a pack.

There we have fetch.unpackLimit and receive.unpackLimit for the two
operations, plus transfer.unpackLimit to control both of them. This
doesn't necessarily need to be tied to that config, but you could
certainly consider it in the same boat. It's a way of transferring a
load of objects into the repository.

So it would make some sense to me to have fastimport.unpackLimit,
falling back to transfer.unpackLimit.

-Peff

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH v2] fast-import: implement unpack limit
  2016-04-23  3:13 ` Jeff King
@ 2016-04-24  4:32   ` Eric Wong
  2016-04-24 19:18     ` Junio C Hamano
  0 siblings, 1 reply; 6+ messages in thread
From: Eric Wong @ 2016-04-24  4:32 UTC (permalink / raw)
  To: Jeff King; +Cc: git, Jonathan Nieder, Junio C Hamano, Shawn Pearce

Jeff King <peff@peff.net> wrote:
> There we have fetch.unpackLimit and receive.unpackLimit for the two
> operations, plus transfer.unpackLimit to control both of them. This
> doesn't necessarily need to be tied to that config, but you could
> certainly consider it in the same boat. It's a way of transferring a
> load of objects into the repository.

Ah, thanks, I've always overlooked those :x  But it makes sense
to me this way.

> So it would make some sense to me to have fastimport.unpackLimit,
> falling back to transfer.unpackLimit.

Done below, purely as a config option with no CLI switch.
I'm hoping the change in default behavior is acceptable
to match behavior of the other "transfer" mechanisms.
I needed to adjust the t9300 test, though.

-----------------------------8<-----------------------------
Subject: [PATCH] fast-import: implement unpack limit

With many incremental imports, small packs become highly
inefficient due to the need to readdir scan and load many
indices to locate even a single object.  Frequent repacking and
consolidation may be prohibitively expensive in terms of disk
I/O, especially in large repositories where the initial packs
were aggressively optimized and marked with .keep files.

In those cases, users may be better served with loose objects
and relying on "git gc --auto".

This changes the default behavior of fast-import for small
imports found in test cases, so adjustments to t9300 were
necessary.

Signed-off-by: Eric Wong <normalperson@yhbt.net>
---
 Documentation/config.txt            |  9 +++++++
 Documentation/git-fast-import.txt   |  2 ++
 fast-import.c                       | 31 ++++++++++++++++++++++++
 t/t9300-fast-import.sh              |  2 ++
 t/t9302-fast-import-unpack-limit.sh | 48 +++++++++++++++++++++++++++++++++++++
 5 files changed, 92 insertions(+)
 create mode 100755 t/t9302-fast-import-unpack-limit.sh

diff --git a/Documentation/config.txt b/Documentation/config.txt
index 42d2b50..3d8bc97 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -1154,6 +1154,15 @@ difftool.<tool>.cmd::
 difftool.prompt::
 	Prompt before each invocation of the diff tool.
 
+fastimport.unpackLimit::
+	If the number of objects imported by linkgit:git-fast-import[1]
+	is below this limit, then the objects will be unpacked into
+	loose object files.  However if the number of imported objects
+	equals or exceeds this limit then the pack will be stored as a
+	pack.  Storing the pack from a fast-import can make the import
+	operation complete faster, especially on slow filesystems.  If
+	not set, the value of `transfer.unpackLimit` is used instead.
+
 fetch.recurseSubmodules::
 	This option can be either set to a boolean value or to 'on-demand'.
 	Setting it to a boolean changes the behavior of fetch and pull to
diff --git a/Documentation/git-fast-import.txt b/Documentation/git-fast-import.txt
index 66910aa..644df99 100644
--- a/Documentation/git-fast-import.txt
+++ b/Documentation/git-fast-import.txt
@@ -136,6 +136,8 @@ Performance and Compression Tuning
 	Maximum size of each output packfile.
 	The default is unlimited.
 
+fastimport.unpackLimit::
+	See linkgit:git-config[1]
 
 Performance
 -----------
diff --git a/fast-import.c b/fast-import.c
index 9fc7093..381d3a0 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -166,6 +166,7 @@ Format of STDIN stream:
 #include "quote.h"
 #include "exec_cmd.h"
 #include "dir.h"
+#include "run-command.h"
 
 #define PACK_ID_BITS 16
 #define MAX_PACK_ID ((1<<PACK_ID_BITS)-1)
@@ -282,6 +283,7 @@ struct recent_command {
 /* Configured limits on output */
 static unsigned long max_depth = 10;
 static off_t max_packsize;
+static int unpack_limit = 100;
 static int force_update;
 static int pack_compression_level = Z_DEFAULT_COMPRESSION;
 static int pack_compression_seen;
@@ -950,6 +952,22 @@ static void unkeep_all_packs(void)
 	}
 }
 
+static int loosen_small_pack(const struct packed_git *p)
+{
+	struct child_process unpack = CHILD_PROCESS_INIT;
+
+	if (lseek(p->pack_fd, 0, SEEK_SET) < 0)
+		die_errno("Failed seeking to start of '%s'", p->pack_name);
+
+	unpack.in = p->pack_fd;
+	unpack.git_cmd = 1;
+	unpack.stdout_to_stderr = 1;
+	argv_array_push(&unpack.args, "unpack-objects");
+	argv_array_push(&unpack.args, "-q");
+
+	return run_command(&unpack);
+}
+
 static void end_packfile(void)
 {
 	static int running;
@@ -972,6 +990,12 @@ static void end_packfile(void)
 		fixup_pack_header_footer(pack_data->pack_fd, pack_data->sha1,
 				    pack_data->pack_name, object_count,
 				    cur_pack_sha1, pack_size);
+
+		if (object_count <= unpack_limit) {
+			if (loosen_small_pack(pack_data) == 0)
+				goto discard_pack;
+		}
+
 		close(pack_data->pack_fd);
 		idx_name = keep_pack(create_index());
 
@@ -1002,6 +1026,7 @@ static void end_packfile(void)
 		pack_id++;
 	}
 	else {
+discard_pack:
 		close(pack_data->pack_fd);
 		unlink_or_warn(pack_data->pack_name);
 	}
@@ -3317,6 +3342,7 @@ static void parse_option(const char *option)
 static void git_pack_config(void)
 {
 	int indexversion_value;
+	int limit;
 	unsigned long packsizelimit_value;
 
 	if (!git_config_get_ulong("pack.depth", &max_depth)) {
@@ -3341,6 +3367,11 @@ static void git_pack_config(void)
 	if (!git_config_get_ulong("pack.packsizelimit", &packsizelimit_value))
 		max_packsize = packsizelimit_value;
 
+	if (!git_config_get_int("fastimport.unpacklimit", &limit))
+		unpack_limit = limit;
+	else if (!git_config_get_int("transfer.unpacklimit", &limit))
+		unpack_limit = limit;
+
 	git_config(git_default_config, NULL);
 }
 
diff --git a/t/t9300-fast-import.sh b/t/t9300-fast-import.sh
index 25bb60b..e6a2b8a 100755
--- a/t/t9300-fast-import.sh
+++ b/t/t9300-fast-import.sh
@@ -52,6 +52,7 @@ echo "$@"'
 ###
 
 test_expect_success 'empty stream succeeds' '
+	git config fastimport.unpackLimit 0 &&
 	git fast-import </dev/null
 '
 
@@ -2675,6 +2676,7 @@ test_expect_success 'R: blob bigger than threshold' '
 	echo >>input &&
 
 	test_create_repo R &&
+	git --git-dir=R/.git config fastimport.unpackLimit 0 &&
 	git --git-dir=R/.git fast-import --big-file-threshold=1 <input
 '
 
diff --git a/t/t9302-fast-import-unpack-limit.sh b/t/t9302-fast-import-unpack-limit.sh
new file mode 100755
index 0000000..0f686d2
--- /dev/null
+++ b/t/t9302-fast-import-unpack-limit.sh
@@ -0,0 +1,48 @@
+#!/bin/sh
+test_description='test git fast-import unpack limit'
+. ./test-lib.sh
+
+test_expect_success 'create loose objects on import' '
+	test_tick &&
+	cat >input <<-INPUT_END &&
+	commit refs/heads/master
+	committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE
+	data <<COMMIT
+	initial
+	COMMIT
+
+	done
+	INPUT_END
+
+	git -c fastimport.unpackLimit=2 fast-import --done <input &&
+	git fsck --no-progress &&
+	test $(find .git/objects/?? -type f | wc -l) -eq 2 &&
+	test $(find .git/objects/pack -type f | wc -l) -eq 0
+'
+
+test_expect_success 'bigger packs are preserved' '
+	test_tick &&
+	cat >input <<-INPUT_END &&
+	commit refs/heads/master
+	committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE
+	data <<COMMIT
+	incremental should create a pack
+	COMMIT
+	from refs/heads/master^0
+
+	commit refs/heads/branch
+	committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE
+	data <<COMMIT
+	branch
+	COMMIT
+
+	done
+	INPUT_END
+
+	git -c fastimport.unpackLimit=2 fast-import --done <input &&
+	git fsck --no-progress &&
+	test $(find .git/objects/?? -type f | wc -l) -eq 2 &&
+	test $(find .git/objects/pack -type f | wc -l) -eq 2
+'
+
+test_done
-- 
EW

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH v2] fast-import: implement unpack limit
  2016-04-24  4:32   ` [PATCH v2] fast-import: implement unpack limit Eric Wong
@ 2016-04-24 19:18     ` Junio C Hamano
  2016-04-24 20:36       ` Eric Wong
  0 siblings, 1 reply; 6+ messages in thread
From: Junio C Hamano @ 2016-04-24 19:18 UTC (permalink / raw)
  To: Eric Wong; +Cc: Jeff King, git, Jonathan Nieder, Shawn Pearce

Eric Wong <normalperson@yhbt.net> writes:

> +static int loosen_small_pack(const struct packed_git *p)
> +{
> +	struct child_process unpack = CHILD_PROCESS_INIT;
> +
> +	if (lseek(p->pack_fd, 0, SEEK_SET) < 0)
> +		die_errno("Failed seeking to start of '%s'", p->pack_name);
> +
> +	unpack.in = p->pack_fd;
> +	unpack.git_cmd = 1;
> +	unpack.stdout_to_stderr = 1;
> +	argv_array_push(&unpack.args, "unpack-objects");
> +	argv_array_push(&unpack.args, "-q");
> +
> +	return run_command(&unpack);
> +}

So you have fd open to a *.pack file you have been writing, you are
going to close and discard it after you return from here, so you
just seek the fd to the beginning and hand it to unpack-objects.

And this works if you haven't finalized the *.pack file with the
corresponding *.idx (otherwise unpack-objects would silently ignore
objects found in the *.idx file); and obviously you haven't and you
are not going to create *.idx file for this.

Looks good.  I haven't thought if "-q" is appropriate or not though.

> @@ -972,6 +990,12 @@ static void end_packfile(void)
>  		fixup_pack_header_footer(pack_data->pack_fd, pack_data->sha1,
>  				    pack_data->pack_name, object_count,
>  				    cur_pack_sha1, pack_size);
> +
> +		if (object_count <= unpack_limit) {
> +			if (loosen_small_pack(pack_data) == 0)
> +				goto discard_pack;
> +		}

"if (!loosen_small_pack(pack_data))" would be more idiomatic, but
the logic is very clear here.  We haven't created the idx, we skip
the part that creates the idx and instead jump directly to the part
that closes and unlinks it.

I like this change.  Thanks.

>  		close(pack_data->pack_fd);
>  		idx_name = keep_pack(create_index());
>  
> @@ -1002,6 +1026,7 @@ static void end_packfile(void)
>  		pack_id++;
>  	}
>  	else {
> +discard_pack:
>  		close(pack_data->pack_fd);
>  		unlink_or_warn(pack_data->pack_name);
>  	}

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2] fast-import: implement unpack limit
  2016-04-24 19:18     ` Junio C Hamano
@ 2016-04-24 20:36       ` Eric Wong
  2016-04-25 21:17         ` [PATCH v3] " Eric Wong
  0 siblings, 1 reply; 6+ messages in thread
From: Eric Wong @ 2016-04-24 20:36 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Jeff King, git, Jonathan Nieder, Shawn Pearce

Junio C Hamano <gitster@pobox.com> wrote:
> Eric Wong <normalperson@yhbt.net> writes:
> > +	argv_array_push(&unpack.args, "unpack-objects");
> > +	argv_array_push(&unpack.args, "-q");
> > +
> > +	return run_command(&unpack);
 
> Looks good.  I haven't thought if "-q" is appropriate or not though.

Oops, I think tying it to the existing --quiet option in PATCH v3
would be good.

> > @@ -972,6 +990,12 @@ static void end_packfile(void)
> >  		fixup_pack_header_footer(pack_data->pack_fd, pack_data->sha1,
> >  				    pack_data->pack_name, object_count,
> >  				    cur_pack_sha1, pack_size);
> > +
> > +		if (object_count <= unpack_limit) {
> > +			if (loosen_small_pack(pack_data) == 0)
> > +				goto discard_pack;
> > +		}
> 
> "if (!loosen_small_pack(pack_data))" would be more idiomatic, but
> the logic is very clear here.  We haven't created the idx, we skip
> the part that creates the idx and instead jump directly to the part
> that closes and unlinks it.

I was on the fence about "!" vs "== 0" vs something else, too;
and I get thrown off by things like "!strcmp" in C all the time.
I can change it to "if (!loosen_small_pack(pack_data))" in v3
(probably in a day or so in case there's further comments)

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH v3] fast-import: implement unpack limit
  2016-04-24 20:36       ` Eric Wong
@ 2016-04-25 21:17         ` Eric Wong
  0 siblings, 0 replies; 6+ messages in thread
From: Eric Wong @ 2016-04-25 21:17 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Jeff King, git, Jonathan Nieder, Shawn Pearce

With many incremental imports, small packs become highly
inefficient due to the need to readdir scan and load many
indices to locate even a single object.  Frequent repacking and
consolidation may be prohibitively expensive in terms of disk
I/O, especially in large repositories where the initial packs
were aggressively optimized and marked with .keep files.

In those cases, users may be better served with loose objects
and relying on "git gc --auto".

This changes the default behavior of fast-import for small
imports found in test cases, so adjustments to t9300 were
necessary.

Signed-off-by: Eric Wong <normalperson@yhbt.net>
---
  v2 changes: implemented as a git-config directive
  v3 changes:
  * honor --quiet for fast-import as -q for unpack-objects
  * minor style adjustment when calling loosen_small_pack

 Documentation/config.txt            |  9 +++++++
 Documentation/git-fast-import.txt   |  2 ++
 fast-import.c                       | 32 +++++++++++++++++++++++++
 t/t9300-fast-import.sh              |  2 ++
 t/t9302-fast-import-unpack-limit.sh | 48 +++++++++++++++++++++++++++++++++++++
 5 files changed, 93 insertions(+)
 create mode 100755 t/t9302-fast-import-unpack-limit.sh

diff --git a/Documentation/config.txt b/Documentation/config.txt
index 42d2b50..3d8bc97 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -1154,6 +1154,15 @@ difftool.<tool>.cmd::
 difftool.prompt::
 	Prompt before each invocation of the diff tool.
 
+fastimport.unpackLimit::
+	If the number of objects imported by linkgit:git-fast-import[1]
+	is below this limit, then the objects will be unpacked into
+	loose object files.  However if the number of imported objects
+	equals or exceeds this limit then the pack will be stored as a
+	pack.  Storing the pack from a fast-import can make the import
+	operation complete faster, especially on slow filesystems.  If
+	not set, the value of `transfer.unpackLimit` is used instead.
+
 fetch.recurseSubmodules::
 	This option can be either set to a boolean value or to 'on-demand'.
 	Setting it to a boolean changes the behavior of fetch and pull to
diff --git a/Documentation/git-fast-import.txt b/Documentation/git-fast-import.txt
index 66910aa..644df99 100644
--- a/Documentation/git-fast-import.txt
+++ b/Documentation/git-fast-import.txt
@@ -136,6 +136,8 @@ Performance and Compression Tuning
 	Maximum size of each output packfile.
 	The default is unlimited.
 
+fastimport.unpackLimit::
+	See linkgit:git-config[1]
 
 Performance
 -----------
diff --git a/fast-import.c b/fast-import.c
index 9fc7093..4fb464c 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -166,6 +166,7 @@ Format of STDIN stream:
 #include "quote.h"
 #include "exec_cmd.h"
 #include "dir.h"
+#include "run-command.h"
 
 #define PACK_ID_BITS 16
 #define MAX_PACK_ID ((1<<PACK_ID_BITS)-1)
@@ -282,6 +283,7 @@ struct recent_command {
 /* Configured limits on output */
 static unsigned long max_depth = 10;
 static off_t max_packsize;
+static int unpack_limit = 100;
 static int force_update;
 static int pack_compression_level = Z_DEFAULT_COMPRESSION;
 static int pack_compression_seen;
@@ -950,6 +952,23 @@ static void unkeep_all_packs(void)
 	}
 }
 
+static int loosen_small_pack(const struct packed_git *p)
+{
+	struct child_process unpack = CHILD_PROCESS_INIT;
+
+	if (lseek(p->pack_fd, 0, SEEK_SET) < 0)
+		die_errno("Failed seeking to start of '%s'", p->pack_name);
+
+	unpack.in = p->pack_fd;
+	unpack.git_cmd = 1;
+	unpack.stdout_to_stderr = 1;
+	argv_array_push(&unpack.args, "unpack-objects");
+	if (!show_stats)
+		argv_array_push(&unpack.args, "-q");
+
+	return run_command(&unpack);
+}
+
 static void end_packfile(void)
 {
 	static int running;
@@ -972,6 +991,12 @@ static void end_packfile(void)
 		fixup_pack_header_footer(pack_data->pack_fd, pack_data->sha1,
 				    pack_data->pack_name, object_count,
 				    cur_pack_sha1, pack_size);
+
+		if (object_count <= unpack_limit) {
+			if (!loosen_small_pack(pack_data))
+				goto discard_pack;
+		}
+
 		close(pack_data->pack_fd);
 		idx_name = keep_pack(create_index());
 
@@ -1002,6 +1027,7 @@ static void end_packfile(void)
 		pack_id++;
 	}
 	else {
+discard_pack:
 		close(pack_data->pack_fd);
 		unlink_or_warn(pack_data->pack_name);
 	}
@@ -3317,6 +3343,7 @@ static void parse_option(const char *option)
 static void git_pack_config(void)
 {
 	int indexversion_value;
+	int limit;
 	unsigned long packsizelimit_value;
 
 	if (!git_config_get_ulong("pack.depth", &max_depth)) {
@@ -3341,6 +3368,11 @@ static void git_pack_config(void)
 	if (!git_config_get_ulong("pack.packsizelimit", &packsizelimit_value))
 		max_packsize = packsizelimit_value;
 
+	if (!git_config_get_int("fastimport.unpacklimit", &limit))
+		unpack_limit = limit;
+	else if (!git_config_get_int("transfer.unpacklimit", &limit))
+		unpack_limit = limit;
+
 	git_config(git_default_config, NULL);
 }
 
diff --git a/t/t9300-fast-import.sh b/t/t9300-fast-import.sh
index 25bb60b..e6a2b8a 100755
--- a/t/t9300-fast-import.sh
+++ b/t/t9300-fast-import.sh
@@ -52,6 +52,7 @@ echo "$@"'
 ###
 
 test_expect_success 'empty stream succeeds' '
+	git config fastimport.unpackLimit 0 &&
 	git fast-import </dev/null
 '
 
@@ -2675,6 +2676,7 @@ test_expect_success 'R: blob bigger than threshold' '
 	echo >>input &&
 
 	test_create_repo R &&
+	git --git-dir=R/.git config fastimport.unpackLimit 0 &&
 	git --git-dir=R/.git fast-import --big-file-threshold=1 <input
 '
 
diff --git a/t/t9302-fast-import-unpack-limit.sh b/t/t9302-fast-import-unpack-limit.sh
new file mode 100755
index 0000000..0f686d2
--- /dev/null
+++ b/t/t9302-fast-import-unpack-limit.sh
@@ -0,0 +1,48 @@
+#!/bin/sh
+test_description='test git fast-import unpack limit'
+. ./test-lib.sh
+
+test_expect_success 'create loose objects on import' '
+	test_tick &&
+	cat >input <<-INPUT_END &&
+	commit refs/heads/master
+	committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE
+	data <<COMMIT
+	initial
+	COMMIT
+
+	done
+	INPUT_END
+
+	git -c fastimport.unpackLimit=2 fast-import --done <input &&
+	git fsck --no-progress &&
+	test $(find .git/objects/?? -type f | wc -l) -eq 2 &&
+	test $(find .git/objects/pack -type f | wc -l) -eq 0
+'
+
+test_expect_success 'bigger packs are preserved' '
+	test_tick &&
+	cat >input <<-INPUT_END &&
+	commit refs/heads/master
+	committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE
+	data <<COMMIT
+	incremental should create a pack
+	COMMIT
+	from refs/heads/master^0
+
+	commit refs/heads/branch
+	committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE
+	data <<COMMIT
+	branch
+	COMMIT
+
+	done
+	INPUT_END
+
+	git -c fastimport.unpackLimit=2 fast-import --done <input &&
+	git fsck --no-progress &&
+	test $(find .git/objects/?? -type f | wc -l) -eq 2 &&
+	test $(find .git/objects/pack -type f | wc -l) -eq 2
+'
+
+test_done
-- 
EW

^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2016-04-25 21:17 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-04-23  2:42 [PATCH] fast-import: implement --min-pack-size parameter Eric Wong
2016-04-23  3:13 ` Jeff King
2016-04-24  4:32   ` [PATCH v2] fast-import: implement unpack limit Eric Wong
2016-04-24 19:18     ` Junio C Hamano
2016-04-24 20:36       ` Eric Wong
2016-04-25 21:17         ` [PATCH v3] " Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).