* [PATCH v2] Custom compression levels for objects and packs @ 2007-05-08 22:38 Dana How 2007-05-08 23:56 ` Junio C Hamano ` (2 more replies) 0 siblings, 3 replies; 23+ messages in thread From: Dana How @ 2007-05-08 22:38 UTC (permalink / raw) To: Junio C Hamano; +Cc: Git Mailing List, danahow Add config variables pack.compression and core.loosecompression . Loose objects will be compressed using level isset(core.loosecompression) ? core.loosecompression : isset(core.compression) ? core.compression : Z_BEST_SPEED and objects in packs will be compressed using level isset(pack.compression) ? pack.compression : isset(core.compression) ? core.compression : Z_DEFAULT_COMPRESSION pack-objects also accepts --compression=N which overrides the latter expression. This applies on top of the git-repack --max-pack-size patchset. Signed-off-by: Dana L. How <danahow@gmail.com> --- builtin-pack-objects.c | 32 +++++++++++++++++++++++++++++++- cache.h | 2 ++ config.c | 18 +++++++++++++++++- environment.c | 4 +++- 4 files changed, 53 insertions(+), 3 deletions(-) diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index 8824793..e80a1d6 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -70,6 +70,8 @@ static uint32_t pack_size_limit; static int pack_to_stdout; static int num_preferred_base; static struct progress progress_state; +static int pack_compression_level = Z_DEFAULT_COMPRESSION; +static int pack_compression_seen; /* * The object names in objects array are hashed with this hashtable, @@ -444,6 +446,10 @@ static unsigned long write_object(struct sha1file *f, * and we do not need to deltify it. */ + /* differing core & pack compression when loose object -> must recompress */ + if (!entry->in_pack && pack_compression_level != zlib_compression_level) + to_reuse = 0; + else if (!entry->in_pack && !entry->delta) { unsigned char *map; unsigned long mapsize; @@ -492,7 +498,7 @@ static unsigned long write_object(struct sha1file *f, } /* compress the data to store and put compressed length in datalen */ memset(&stream, 0, sizeof(stream)); - deflateInit(&stream, zlib_compression_level); + deflateInit(&stream, pack_compression_level); maxsize = deflateBound(&stream, size); out = xmalloc(maxsize); /* Compress it */ @@ -1624,6 +1630,16 @@ static int git_pack_config(const char *k, const char *v) window = git_config_int(k, v); return 0; } + if (!strcmp(k, "pack.compression")) { + int level = git_config_int(k, v); + if (level == -1) + level = Z_DEFAULT_COMPRESSION; + else if (level < 0 || level > Z_BEST_COMPRESSION) + die("bad pack compression level %d", level); + pack_compression_level = level; + pack_compression_seen = 1; + return 0; + } return git_default_config(k, v); } @@ -1734,6 +1750,8 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) rp_ac = 2; git_config(git_pack_config); + if (!pack_compression_seen && core_compression_seen) + pack_compression_level = core_compression_level; progress = isatty(2); for (i = 1; i < argc; i++) { @@ -1761,6 +1779,18 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) usage(pack_usage); continue; } + if (!prefixcmp(arg, "--compression=")) { + char *end; + int level = strtoul(arg+14, &end, 0); + if (!arg[14] || *end) + usage(pack_usage); + if (level == -1) + level = Z_DEFAULT_COMPRESSION; + else if (level < 0 || level > Z_BEST_COMPRESSION) + die("bad pack compression level %d", level); + pack_compression_level = level; + continue; + } if (!prefixcmp(arg, "--window=")) { char *end; window = strtoul(arg+9, &end, 0); diff --git a/cache.h b/cache.h index 8e76152..2b3f359 100644 --- a/cache.h +++ b/cache.h @@ -283,6 +283,8 @@ extern int warn_ambiguous_refs; extern int shared_repository; extern const char *apply_default_whitespace; extern int zlib_compression_level; +extern int core_compression_level; +extern int core_compression_seen; extern size_t packed_git_window_size; extern size_t packed_git_limit; extern size_t delta_base_cache_limit; diff --git a/config.c b/config.c index 70d1055..5627ed6 100644 --- a/config.c +++ b/config.c @@ -12,6 +12,8 @@ static FILE *config_file; static const char *config_file_name; static int config_linenr; +static int zlib_compression_seen; + static int get_next_char(void) { int c; @@ -304,13 +306,27 @@ int git_default_config(const char *var, const char *value) return 0; } - if (!strcmp(var, "core.compression")) { + if (!strcmp(var, "core.loosecompression")) { int level = git_config_int(var, value); if (level == -1) level = Z_DEFAULT_COMPRESSION; else if (level < 0 || level > Z_BEST_COMPRESSION) die("bad zlib compression level %d", level); zlib_compression_level = level; + zlib_compression_seen = 1; + return 0; + } + + if (!strcmp(var, "core.compression")) { + int level = git_config_int(var, value); + if (level == -1) + level = Z_DEFAULT_COMPRESSION; + else if (level < 0 || level > Z_BEST_COMPRESSION) + die("bad zlib compression level %d", level); + core_compression_level = level; + core_compression_seen = 1; + if (!zlib_compression_seen) + zlib_compression_level = level; return 0; } diff --git a/environment.c b/environment.c index 2231659..b7aeb1a 100644 --- a/environment.c +++ b/environment.c @@ -24,7 +24,9 @@ const char *git_commit_encoding; const char *git_log_output_encoding; int shared_repository = PERM_UMASK; const char *apply_default_whitespace; -int zlib_compression_level = Z_DEFAULT_COMPRESSION; +int zlib_compression_level = Z_BEST_SPEED; +int core_compression_level; +int core_compression_seen; size_t packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE; size_t packed_git_limit = DEFAULT_PACKED_GIT_LIMIT; size_t delta_base_cache_limit = 16 * 1024 * 1024; -- 1.5.2.rc0.787.g0014 ^ permalink raw reply related [flat|nested] 23+ messages in thread
* Re: [PATCH v2] Custom compression levels for objects and packs 2007-05-08 22:38 [PATCH v2] Custom compression levels for objects and packs Dana How @ 2007-05-08 23:56 ` Junio C Hamano 2007-05-09 0:16 ` Nicolas Pitre 2007-05-09 0:25 ` Dana How 2007-05-09 0:30 ` Petr Baudis 2007-05-09 13:56 ` Theodore Tso 2 siblings, 2 replies; 23+ messages in thread From: Junio C Hamano @ 2007-05-08 23:56 UTC (permalink / raw) To: Dana How; +Cc: Git Mailing List Dana How <danahow@gmail.com> writes: > Add config variables pack.compression and core.loosecompression . > Loose objects will be compressed using level > isset(core.loosecompression) ? core.loosecompression : > isset(core.compression) ? core.compression : Z_BEST_SPEED > and objects in packs will be compressed using level > isset(pack.compression) ? pack.compression : > isset(core.compression) ? core.compression : Z_DEFAULT_COMPRESSION > pack-objects also accepts --compression=N which > overrides the latter expression. Do you think the above is readable? Compression level for loose objects is controlled by variable core.loosecompression (or core.compression, if the former is missing), and defaults to best-speed. or something like that? > This applies on top of the git-repack --max-pack-size patchset. Hmph, that makes the --max-pack-size patchset take this more trivial and straightforward improvements hostage. In general, I'd prefer more elaborate ones based on less questionable series. > @@ -444,6 +446,10 @@ static unsigned long write_object(struct sha1file *f, > * and we do not need to deltify it. > */ > > + /* differing core & pack compression when loose object -> must recompress */ > + if (!entry->in_pack && pack_compression_level != zlib_compression_level) > + to_reuse = 0; > + else I am not sure if that is worth it, as you do not know if the loose object you are looking at were compressed with the current settings. > diff --git a/cache.h b/cache.h > index 8e76152..2b3f359 100644 > --- a/cache.h > +++ b/cache.h > @@ -283,6 +283,8 @@ extern int warn_ambiguous_refs; > extern int shared_repository; > extern const char *apply_default_whitespace; > extern int zlib_compression_level; > +extern int core_compression_level; > +extern int core_compression_seen; Could we somehow remove _seen? Perhaps by initializing the _level to -1? > +int core_compression_level; > +int core_compression_seen; Same here. ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH v2] Custom compression levels for objects and packs 2007-05-08 23:56 ` Junio C Hamano @ 2007-05-09 0:16 ` Nicolas Pitre 2007-05-09 0:29 ` Dana How 2007-05-09 0:25 ` Dana How 1 sibling, 1 reply; 23+ messages in thread From: Nicolas Pitre @ 2007-05-09 0:16 UTC (permalink / raw) To: Junio C Hamano; +Cc: Dana How, Git Mailing List On Tue, 8 May 2007, Junio C Hamano wrote: > Dana How <danahow@gmail.com> writes: > > > @@ -444,6 +446,10 @@ static unsigned long write_object(struct sha1file *f, > > * and we do not need to deltify it. > > */ > > > > + /* differing core & pack compression when loose object -> must recompress */ > > + if (!entry->in_pack && pack_compression_level != zlib_compression_level) > > + to_reuse = 0; > > + else > > I am not sure if that is worth it, as you do not know if the > loose object you are looking at were compressed with the current > settings. I was about to make the same comment. > > diff --git a/cache.h b/cache.h > > index 8e76152..2b3f359 100644 > > --- a/cache.h > > +++ b/cache.h > > @@ -283,6 +283,8 @@ extern int warn_ambiguous_refs; > > extern int shared_repository; > > extern const char *apply_default_whitespace; > > extern int zlib_compression_level; > > +extern int core_compression_level; > > +extern int core_compression_seen; > > Could we somehow remove _seen? Perhaps by initializing the > _level to -1? -1 is a valid value for compression. Actually it is equivalent to Z_DEFAULT_COMPRESSION. If we want the fallback logic to work, at some point we must remember if the current value is the default or if it is the result of an explicit config option. Nicolas ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH v2] Custom compression levels for objects and packs 2007-05-09 0:16 ` Nicolas Pitre @ 2007-05-09 0:29 ` Dana How 2007-05-09 1:03 ` Nicolas Pitre 0 siblings, 1 reply; 23+ messages in thread From: Dana How @ 2007-05-09 0:29 UTC (permalink / raw) To: Nicolas Pitre; +Cc: Junio C Hamano, Git Mailing List, danahow On 5/8/07, Nicolas Pitre <nico@cam.org> wrote: > On Tue, 8 May 2007, Junio C Hamano wrote: > > Dana How <danahow@gmail.com> writes: > > > + /* differing core & pack compression when loose object -> must recompress */ > > > + if (!entry->in_pack && pack_compression_level != zlib_compression_level) > > > + to_reuse = 0; > > > + else > > I am not sure if that is worth it, as you do not know if the > > loose object you are looking at were compressed with the current > > settings. > I was about to make the same comment. I was bitten by *not* doing this. Please see the more verbose reply to Junio's comment. > > Could we somehow remove _seen? Perhaps by initializing the > > _level to -1? > > -1 is a valid value for compression. Actually it is equivalent to > Z_DEFAULT_COMPRESSION. > > If we want the fallback logic to work, at some point we must remember if > the current value is the default or if it is the result of an explicit > config option. I can leave as-is, or use a magic value like -99 and depend on it not colliding with values in zlib.h. Thanks, -- Dana L. How danahow@gmail.com +1 650 804 5991 cell ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH v2] Custom compression levels for objects and packs 2007-05-09 0:29 ` Dana How @ 2007-05-09 1:03 ` Nicolas Pitre 2007-05-09 6:46 ` Dana How 0 siblings, 1 reply; 23+ messages in thread From: Nicolas Pitre @ 2007-05-09 1:03 UTC (permalink / raw) To: Dana How; +Cc: Junio C Hamano, Git Mailing List On Tue, 8 May 2007, Dana How wrote: > On 5/8/07, Nicolas Pitre <nico@cam.org> wrote: > > On Tue, 8 May 2007, Junio C Hamano wrote: > > If we want the fallback logic to work, at some point we must remember if > > the current value is the default or if it is the result of an explicit > > config option. > I can leave as-is, or use a magic value like -99 and > depend on it not colliding with values in zlib.h. And where would you set those variables to a sensible default in the absence of any config option? Nicolas ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH v2] Custom compression levels for objects and packs 2007-05-09 1:03 ` Nicolas Pitre @ 2007-05-09 6:46 ` Dana How 2007-05-09 7:13 ` Junio C Hamano 0 siblings, 1 reply; 23+ messages in thread From: Dana How @ 2007-05-09 6:46 UTC (permalink / raw) To: Nicolas Pitre; +Cc: Junio C Hamano, Git Mailing List, danahow On 5/8/07, Nicolas Pitre <nico@cam.org> wrote: > On Tue, 8 May 2007, Dana How wrote: > > On 5/8/07, Nicolas Pitre <nico@cam.org> wrote: > > > On Tue, 8 May 2007, Junio C Hamano wrote: > > > If we want the fallback logic to work, at some point we must remember if > > > the current value is the default or if it is the result of an explicit > > > config option. > > I can leave as-is, or use a magic value like -99 and > > depend on it not colliding with values in zlib.h. > And where would you set those variables to a sensible default in the > absence of any config option? This is why I used the _seen variables, but they are not necessary -- see 3rd option below. Unfortunately we agreed a day or two ago to use a config rule like used_value = isset(var1) ? var1 : isset(var2) ? var2 : DEFAULT. This doesn't interact well with each variable being processed completely independently in git_config() and the callbacks it calls. The isset() value is "out-of-band"; either store it in the _seen variables, or some special value in used_value . Which makes the most sense: * Leave _seen as-is; * Move pack.compression recognition into config.c which means the _seen variables would all be local to config.c; * Use some special value, and if still present replace it with the default at the end of git_config() using extra code; * Change the config rule to something simpler. I like the 2nd and the 4th. You didn't like the 4th. Shall I change to the 2nd? Thanks, -- Dana L. How danahow@gmail.com +1 650 804 5991 cell ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH v2] Custom compression levels for objects and packs 2007-05-09 6:46 ` Dana How @ 2007-05-09 7:13 ` Junio C Hamano 0 siblings, 0 replies; 23+ messages in thread From: Junio C Hamano @ 2007-05-09 7:13 UTC (permalink / raw) To: Dana How; +Cc: Nicolas Pitre, Git Mailing List "Dana How" <danahow@gmail.com> writes: > This doesn't interact well with each variable being processed > completely independently in git_config() and the callbacks it calls. > The isset() value is "out-of-band"; either store it in the _seen > variables, or some special value in used_value . > > Which makes the most sense: > * Leave _seen as-is; > * Move pack.compression recognition into config.c which means > the _seen variables would all be local to config.c; > * Use some special value, and if still present replace it with the default > at the end of git_config() using extra code; > * Change the config rule to something simpler. > > I like the 2nd and the 4th. You didn't like the 4th. > Shall I change to the 2nd? FWIW, I am Ok with (1). ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH v2] Custom compression levels for objects and packs 2007-05-08 23:56 ` Junio C Hamano 2007-05-09 0:16 ` Nicolas Pitre @ 2007-05-09 0:25 ` Dana How 2007-05-09 1:23 ` Nicolas Pitre 2007-05-09 5:59 ` [PATCH v2] Custom compression levels for objects and packs Junio C Hamano 1 sibling, 2 replies; 23+ messages in thread From: Dana How @ 2007-05-09 0:25 UTC (permalink / raw) To: Junio C Hamano; +Cc: Git Mailing List, danahow On 5/8/07, Junio C Hamano <junkio@cox.net> wrote: > Dana How <danahow@gmail.com> writes: > > Add config variables pack.compression and core.loosecompression . > > Loose objects will be compressed using level > > isset(core.loosecompression) ? core.loosecompression : > > isset(core.compression) ? core.compression : Z_BEST_SPEED > > and objects in packs will be compressed using level > > isset(pack.compression) ? pack.compression : > > isset(core.compression) ? core.compression : Z_DEFAULT_COMPRESSION > > pack-objects also accepts --compression=N which > > overrides the latter expression. > > Do you think the above is readable? > Compression level for loose objects is controlled by variable > core.loosecompression (or core.compression, if the former is > missing), and defaults to best-speed. > or something like that? Your phrasing is much better. > > This applies on top of the git-repack --max-pack-size patchset. > Hmph, that makes the --max-pack-size patchset take this more > trivial and straightforward improvements hostage. In general, > I'd prefer more elaborate ones based on less questionable > series. The max-pack-size and pack.compression patches touch the same lines. I thought my options were: * Submit independently and make you merge; or * Make one precede the other. Since max-pack-size has been out there since April 4 and the first acceptable version was May 1 (suggested by 0 comments), I didn't realize it was a "questionable series". I think it should be straightforward for me to re-submit this based on current master. > > + /* differing core & pack compression when loose object -> must recompress */ > > + if (!entry->in_pack && pack_compression_level != zlib_compression_level) > > + to_reuse = 0; > > + else > I am not sure if that is worth it, as you do not know if the > loose object you are looking at were compressed with the current > settings. You do not know for certain, that is correct. However, config settings setting unequal compression levels signal that you care differently about the two cases. (For me, I want the compression investment to correspond to the expected lifetime of the file.) Also, *if* we have the knobs we want in the config file, I don't think we're going to be changing these settings all that often. If I didn't have this check forcing recompression in the pack, then in the absence of deltification each object would enter the pack by being copied (in the preceding code block) and pack.compression would have little effect. I actually experienced this the very first time I imported a large dataset into git (I was trying to achieve the effect of this patch by changing core.compression dynamically, and was a bit mystified for a while by the result). Thus, if core.loosecompression is set to speed up git-add, I should take the time to recompress the object when packing if pack.compression is different (of course the hit of not doing so will be lessened by deltification which forces a new compression). > > diff --git a/cache.h b/cache.h > > index 8e76152..2b3f359 100644 > > --- a/cache.h > > +++ b/cache.h > > @@ -283,6 +283,8 @@ extern int warn_ambiguous_refs; > > extern int shared_repository; > > extern const char *apply_default_whitespace; > > extern int zlib_compression_level; > > +extern int core_compression_level; > > +extern int core_compression_seen; > > Could we somehow remove _seen? Perhaps by initializing the > _level to -1? > > > +int core_compression_level; > > +int core_compression_seen; > > Same here. I agree completely. But, what magic value should I use to initialize the _level variables so I know they are not set? All valid settings come from zlib.h through #define's but there is no "invalid" defined. Maybe I'll use -99. Thanks, -- Dana L. How danahow@gmail.com +1 650 804 5991 cell ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH v2] Custom compression levels for objects and packs 2007-05-09 0:25 ` Dana How @ 2007-05-09 1:23 ` Nicolas Pitre 2007-05-09 9:21 ` Dana How 2007-05-09 5:59 ` [PATCH v2] Custom compression levels for objects and packs Junio C Hamano 1 sibling, 1 reply; 23+ messages in thread From: Nicolas Pitre @ 2007-05-09 1:23 UTC (permalink / raw) To: Dana How; +Cc: Junio C Hamano, Git Mailing List On Tue, 8 May 2007, Dana How wrote: > Since max-pack-size has been out there since April 4 and > the first acceptable version was May 1 (suggested by 0 comments), > I didn't realize it was a "questionable series". > > I think it should be straightforward for me to re-submit this > based on current master. Since this patch is simpler it could be merged much faster, before the pack limit series. > > > + /* differing core & pack compression when loose object -> must > > recompress */ > > > + if (!entry->in_pack && pack_compression_level != > > zlib_compression_level) > > > + to_reuse = 0; > > > + else > > I am not sure if that is worth it, as you do not know if the > > loose object you are looking at were compressed with the current > > settings. > You do not know for certain, that is correct. However, config > settings setting unequal compression levels signal that you > care differently about the two cases. (For me, I want the > compression investment to correspond to the expected lifetime of the file.) > Also, *if* we have the knobs we want in the config file, > I don't think we're going to be changing these settings all that often. > > If I didn't have this check forcing recompression in the pack, > then in the absence of deltification each object would enter the pack > by being copied (in the preceding code block) and pack.compression > would have little effect. I actually experienced this the very first > time I imported a large dataset into git (I was trying to achieve the > effect of this patch by changing core.compression dynamically, and > was a bit mystified for a while by the result). > > Thus, if core.loosecompression is set to speed up git-add, I should > take the time to recompress the object when packing if pack.compression > is different (of course the hit of not doing so will be lessened by > deltification > which forces a new compression). Right. And this also depends whether or not you have core.legacyheaders set to false or not. And the whole purpose for setting core.legacyheaders is exactly to allow for loose objects to be copied straight into the pack. This should have priority over mismatched compression levels IMHO. Also, when repacking, delta reuse does not recompress objects for the same reason, regardless of the compression level used when they were compressed initially. Same argument goes for delta depth. So if you really want to ensure a compression level on the whole pack, you'll have to use -f with git-repack. Or leave core.legacyheaders unset. Nicolas ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH v2] Custom compression levels for objects and packs 2007-05-09 1:23 ` Nicolas Pitre @ 2007-05-09 9:21 ` Dana How 2007-05-09 15:27 ` Nicolas Pitre 0 siblings, 1 reply; 23+ messages in thread From: Dana How @ 2007-05-09 9:21 UTC (permalink / raw) To: Nicolas Pitre; +Cc: Junio C Hamano, Git Mailing List, danahow On 5/8/07, Nicolas Pitre <nico@cam.org> wrote: > On Tue, 8 May 2007, Dana How wrote: > > I think it should be straightforward for me to re-submit this > > based on current master. > Since this patch is simpler it could be merged much faster, before the > pack limit series. Yes, I will do "custom compression" patch first. > > > > + /* differing core & pack compression when loose object -> must > > > recompress */ > > > I am not sure if that is worth it, as you do not know if the > > > loose object you are looking at were compressed with the current > > > settings. > > You do not know for certain, that is correct. However, config > > settings setting unequal compression levels signal that you > > care differently about the two cases. (For me, I want the > > compression investment to correspond to the expected lifetime of the file.) > > Also, *if* we have the knobs we want in the config file, > > I don't think we're going to be changing these settings all that often. > > > > If I didn't have this check forcing recompression in the pack, > > then in the absence of deltification each object would enter the pack > > by being copied (in the preceding code block) and pack.compression > > would have little effect. I actually experienced this the very first > > time I imported a large dataset into git (I was trying to achieve the > > effect of this patch by changing core.compression dynamically, and > > was a bit mystified for a while by the result). > > > > Thus, if core.loosecompression is set to speed up git-add, I should > > take the time to recompress the object when packing if pack.compression > > is different (of course the hit of not doing so will be lessened by > > deltification > > which forces a new compression). > > Right. And this also depends whether or not you have core.legacyheaders > set to false or not. > > And the whole purpose for setting core.legacyheaders is exactly to allow > for loose objects to be copied straight into the pack. This should have > priority over mismatched compression levels IMHO. OK, I got really confused here, so I looked over the code, and figured out 2 causes for my confusion. (1) core.legacyheaders controls use_legacy_headers, which defaults to 1. So currently all loose objects are in legacy format and the code block I spoke of doesn't trigger [without a config setting]. I didn't realize legacy headers were still being produced (mislead by the name!). (2) I read your "setting core.legacyheaders" as followed by TRUE, but you meant FALSE, which is not the default. I also read that 1 year after 1.4.2, the default for core.legacyheaders is going to change to FALSE. I think our discussion should assume this has happened. So let's assume FALSE in the following. The point of that is that such a FALSE setting can't be assumed to have any special intent; it will be the default. [Everything I write here boils down to only one question, which I repeat at the end.] Data gets into a pack in these ways: 1. Loose object copied in; 2. Loose object newly deltified; 3. Packed object to be copied; 4. Packed object to be newly deltified; 5. Packed deltified object we can't re-use; 6. Packed deltified object we can re-use. ["copied" includes recompressed.] In (2), (4), and (5), pack.compression will always be newly used. If pack.compression doesn't change, this means (6) will be using pack.compression since it comes from (2) or (4). So if I "guarantee" that (1) uses pack.compression, (3) will as well, meaning everything in the pack will be at pack.compression. Thus if pack.compression != core.loosecompression takes precedence over core.legacyheaders = false, then for pack.compression constant we get all 6 cases at level pack.compression. If core.legacyheaders = false takes precedence as you suggest, then all undeltified objects (20%?) will be stuck at core.loosecompression [since I see no way to sensibly re-apply compression to something copied pack-to-pack]. I think this is inconsistent with what a pack.compression != core.loosecompression setting is telling us. My arguments have 2 biases. (a) I assume pack.compression stays constant, and if it changes, I see little value in worrying about "forcing" all objects in a new pack, some of which might be copied from an old pack, to be at the same compression level. (b) I focused first on packing to disk, where a constant pack.compression makes sense. For packing to stdout (pack transfer), especially on a popular, hence loaded, repository server, we may want to use --compress=N to crank down on effort spent compressing new deltas [2+4] (or loose objects [1]) (or unusable deltas [5]). This still lets better compressed objects flow through in the other cases [3+6]. So for one-use packs, a mix of compression levels could be desirable and is achievable with --compression=N if/when so. > Also, when repacking, delta reuse does not recompress objects for the > same reason, regardless of the compression level used when they were > compressed initially. Same argument goes for delta depth. Delta reuse doesn't need recompression. For pack.compression constant, they will already be at the correct level. I think we agree on behavior here for different reasons. > So if you really want to ensure a compression level on the whole pack, > you'll have to use -f with git-repack. Or leave core.legacyheaders > unset. -f would be needed if you were in the practice of changing pack.compression, yes. So, after covering all these cases, have I convinced you that recompression takes precedence over legacyheaders = FALSE? Thanks, -- Dana L. How danahow@gmail.com +1 650 804 5991 cell ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH v2] Custom compression levels for objects and packs 2007-05-09 9:21 ` Dana How @ 2007-05-09 15:27 ` Nicolas Pitre 2007-05-09 16:26 ` Junio C Hamano 0 siblings, 1 reply; 23+ messages in thread From: Nicolas Pitre @ 2007-05-09 15:27 UTC (permalink / raw) To: Dana How; +Cc: Junio C Hamano, Git Mailing List On Wed, 9 May 2007, Dana How wrote: > OK, I got really confused here, so I looked over the code, > and figured out 2 causes for my confusion. > (1) core.legacyheaders controls use_legacy_headers, which defaults to 1. > So currently all loose objects are in legacy format and the code block > I spoke of doesn't trigger [without a config setting]. I didn't realize > legacy headers were still being produced (mislead by the name!). > (2) I read your "setting core.legacyheaders" as followed by TRUE, > but you meant FALSE, which is not the default. > > I also read that 1 year after 1.4.2, the default for core.legacyheaders is > going > to change to FALSE. I think our discussion should assume this has > happened. <tangential comment> Now that we encourage and actively preserve objects in a packed form more agressively than we did at the time core.legacyheaders was introduced, I really wonder if this option is still worth it. Because the packing of loose objects has to go through the delta match loop anyway, and since most of them should end up being deltified in most cases, there is really little advantage to have this parallel loose object format as the CPU savings it might provide is rather lost in the noise in the end. So I suggest that we get rid of core.legacyheaders, preserve the legacy format as the only writable loose object format and deprecate the other one to keep things simpler. Thoughts? </tangential comment> > So let's assume FALSE in the following. The point of that is that > such a FALSE setting can't be assumed to have any special intent; it > will be the default. > > [Everything I write here boils down to only one question, > which I repeat at the end.] > > Data gets into a pack in these ways: > 1. Loose object copied in; > 2. Loose object newly deltified; > 3. Packed object to be copied; > 4. Packed object to be newly deltified; > 5. Packed deltified object we can't re-use; > 6. Packed deltified object we can re-use. > ["copied" includes recompressed.] I think you forgets "packed undeltified objects we can reuse". > In (2), (4), and (5), pack.compression will always be newly used. > If pack.compression doesn't change, this means (6) > will be using pack.compression since it comes from (2) or (4). > So if I "guarantee" that (1) uses pack.compression, > (3) will as well, meaning everything in the pack will be > at pack.compression. > > Thus if pack.compression != core.loosecompression takes precedence > over core.legacyheaders = false, then for pack.compression constant > we get all 6 cases at level pack.compression. If core.legacyheaders = > false takes precedence as you suggest, then all undeltified objects > (20%?) will be stuck at core.loosecompression [since I see no way > to sensibly re-apply compression to something copied pack-to-pack]. > > I think this is inconsistent with what a pack.compression != > core.loosecompression setting is telling us. OK I see that I missed the fact that git-repack -f (or git-pack-objects --no-reuse-delta) does not recompress undeltified objects. Note this is a problem in the case where you change pack.compression to a different value or override it on the command line as well: reused undeltified objects won't get recompressed with the new level. My rant on core.legacyheaders and its removal would address the first case. Your test for a difference between loose and packed compression levels is flawed because the value of core.compression does not necessarily represent the compression level that was used for the loose objects to pack (core.compression might have been modified since then), but it only addresses the first case too. And this is a problem even now. What we need instead is a --no-reuse-object that would force recompression of everything when you really want to enforce a specific compression level across the whole pack(s). Nicolas ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH v2] Custom compression levels for objects and packs 2007-05-09 15:27 ` Nicolas Pitre @ 2007-05-09 16:26 ` Junio C Hamano 2007-05-09 16:42 ` Dana How ` (2 more replies) 0 siblings, 3 replies; 23+ messages in thread From: Junio C Hamano @ 2007-05-09 16:26 UTC (permalink / raw) To: Nicolas Pitre; +Cc: Dana How, Git Mailing List Nicolas Pitre <nico@cam.org> writes: > So I suggest that we get rid of core.legacyheaders, preserve the legacy > format as the only writable loose object format and deprecate the other > one to keep things simpler. Thoughts? I agree with your analysis, especially when deeper delta chains are allowed, straight copy of loose object becomes less and less likely. > What we need instead is a --no-reuse-object that would force > recompression of everything when you really want to enforce a specific > compression level across the whole pack(s). Yeah. Or maybe --no-reuse to mean both and make '-f' a short-hand synonym for that. I do not see much reason to want to tweak them independently; recomputing delta is much more expensive than recompressing anyway, and when the user says 'repack -f', it is a sign that the user is willing to spend CPU cycles. ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH v2] Custom compression levels for objects and packs 2007-05-09 16:26 ` Junio C Hamano @ 2007-05-09 16:42 ` Dana How 2007-05-09 16:59 ` [PATCH] make "repack -f" imply "pack-objects --no-reuse-object" Nicolas Pitre 2007-05-09 18:42 ` [PATCH] deprecate the new loose object header format Nicolas Pitre 2 siblings, 0 replies; 23+ messages in thread From: Dana How @ 2007-05-09 16:42 UTC (permalink / raw) To: Junio C Hamano; +Cc: Nicolas Pitre, Git Mailing List, danahow On 5/9/07, Junio C Hamano <junkio@cox.net> wrote: > Nicolas Pitre <nico@cam.org> writes: > > > So I suggest that we get rid of core.legacyheaders, preserve the legacy > > format as the only writable loose object format and deprecate the other > > one to keep things simpler. Thoughts? > > I agree with your analysis, especially when deeper delta chains > are allowed, straight copy of loose object becomes less and less > likely. I too agree with the logic in the original <tangential comment>, and deprecation is the appropriate thing. So I'm not actually going to change anything here in today's patch. > > What we need instead is a --no-reuse-object that would force > > recompression of everything when you really want to enforce a specific > > compression level across the whole pack(s). > > Yeah. Or maybe --no-reuse to mean both and make '-f' a > short-hand synonym for that. > > I do not see much reason to want to tweak them independently; > recomputing delta is much more expensive than recompressing > anyway, and when the user says 'repack -f', it is a sign that > the user is willing to spend CPU cycles. We have the same idea about git-repack -f -- it implies both --no-reuse-delta and --no-reuse-object , and I will do that. I will incorporate Nicolas's new --no-reuse-object in my patch to builtin-pack-objects.c . I won't go so far as collapsing --no-reuse-* to --no-reuse in this patch. Thanks, -- Dana L. How danahow@gmail.com +1 650 804 5991 cell ^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH] make "repack -f" imply "pack-objects --no-reuse-object" 2007-05-09 16:26 ` Junio C Hamano 2007-05-09 16:42 ` Dana How @ 2007-05-09 16:59 ` Nicolas Pitre 2007-05-09 18:42 ` [PATCH] deprecate the new loose object header format Nicolas Pitre 2 siblings, 0 replies; 23+ messages in thread From: Nicolas Pitre @ 2007-05-09 16:59 UTC (permalink / raw) To: Junio C Hamano; +Cc: Dana How, Git Mailing List As Junio said: Recomputing delta is much more expensive than recompressing anyway, and when the user says 'repack -f', it is a sign that the user is willing to spend CPU cycles. Signed-off-by: Nicolas Pitre <nico@cam.org> --- On Wed, 9 May 2007, Junio C Hamano wrote: > Nicolas Pitre <nico@cam.org> writes: > > > What we need instead is a --no-reuse-object that would force > > recompression of everything when you really want to enforce a specific > > compression level across the whole pack(s). > > Yeah. Or maybe --no-reuse to mean both and make '-f' a > short-hand synonym for that. > > I do not see much reason to want to tweak them independently; > recomputing delta is much more expensive than recompressing > anyway, and when the user says 'repack -f', it is a sign that > the user is willing to spend CPU cycles. This applies on top of my --no-reuse-object patch. I think it is good to have the flexibility in the plumbing, but as you say git-repack might as well use the strongest option. diff --git a/git-repack.sh b/git-repack.sh index ddfa8b4..8bf66a4 100755 --- a/git-repack.sh +++ b/git-repack.sh @@ -8,7 +8,7 @@ SUBDIRECTORY_OK='Yes' . git-sh-setup no_update_info= all_into_one= remove_redundant= -local= quiet= no_reuse_delta= extra= +local= quiet= no_reuse= extra= while case "$#" in 0) break ;; esac do case "$1" in @@ -16,7 +16,7 @@ do -a) all_into_one=t ;; -d) remove_redundant=t ;; -q) quiet=-q ;; - -f) no_reuse_delta=--no-reuse-delta ;; + -f) no_reuse=--no-reuse-object ;; -l) local=--local ;; --window=*) extra="$extra $1" ;; --depth=*) extra="$extra $1" ;; @@ -61,7 +61,7 @@ case ",$all_into_one," in ;; esac -args="$args $local $quiet $no_reuse_delta$extra" +args="$args $local $quiet $no_reuse$extra" name=$(git-pack-objects --non-empty --all --reflog $args </dev/null "$PACKTMP") || exit 1 if [ -z "$name" ]; then ^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH] deprecate the new loose object header format 2007-05-09 16:26 ` Junio C Hamano 2007-05-09 16:42 ` Dana How 2007-05-09 16:59 ` [PATCH] make "repack -f" imply "pack-objects --no-reuse-object" Nicolas Pitre @ 2007-05-09 18:42 ` Nicolas Pitre 2007-05-09 20:16 ` Dana How 2 siblings, 1 reply; 23+ messages in thread From: Nicolas Pitre @ 2007-05-09 18:42 UTC (permalink / raw) To: Junio C Hamano; +Cc: Dana How, Git Mailing List Now that we encourage and actively preserve objects in a packed form more agressively than we did at the time the new loose object format and core.legacyheaders were introduced, that extra loose object format doesn't appear to be worth it anymore. Because the packing of loose objects has to go through the delta match loop anyway, and since most of them should end up being deltified in most cases, there is really little advantage to have this parallel loose object format as the CPU savings it might provide is rather lost in the noise in the end. This patch gets rid of core.legacyheaders, preserve the legacy format as the only writable loose object format and deprecate the other one to keep things simpler. Signed-off-by: Nicolas Pitre <nico@cam.org> --- On Wed, 9 May 2007, Junio C Hamano wrote: > I agree with your analysis, especially when deeper delta chains > are allowed, straight copy of loose object becomes less and less > likely. So here it is, with a nice code reduction: Documentation/config.txt | 13 -------- builtin-pack-objects.c | 69 ---------------------------------------------- cache.h | 2 - config.c | 5 --- environment.c | 1 - sha1_file.c | 47 +++++++------------------------ 6 files changed, 11 insertions(+), 126 deletions(-) diff --git a/Documentation/config.txt b/Documentation/config.txt index ea434af..d6d89ba 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -209,19 +209,6 @@ core.compression:: compression, and 1..9 are various speed/size tradeoffs, 9 being slowest. -core.legacyheaders:: - A boolean which - changes the format of loose objects so that they are more - efficient to pack and to send out of the repository over git - native protocol, since v1.4.2. However, loose objects - written in the new format cannot be read by git older than - that version; people fetching from your repository using - older versions of git over dumb transports (e.g. http) - will also be affected. -+ -To let git use the new loose object format, you have to -set core.legacyheaders to false. - core.packedGitWindowSize:: Number of bytes of a pack file to map into memory in a single mapping operation. Larger window sizes may allow diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index 966f843..c74a361 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -346,56 +346,6 @@ static void copy_pack_data(struct sha1file *f, } } -static int check_loose_inflate(unsigned char *data, unsigned long len, unsigned long expect) -{ - z_stream stream; - unsigned char fakebuf[4096]; - int st; - - memset(&stream, 0, sizeof(stream)); - stream.next_in = data; - stream.avail_in = len; - stream.next_out = fakebuf; - stream.avail_out = sizeof(fakebuf); - inflateInit(&stream); - - while (1) { - st = inflate(&stream, Z_FINISH); - if (st == Z_STREAM_END || st == Z_OK) { - st = (stream.total_out == expect && - stream.total_in == len) ? 0 : -1; - break; - } - if (st != Z_BUF_ERROR) { - st = -1; - break; - } - stream.next_out = fakebuf; - stream.avail_out = sizeof(fakebuf); - } - inflateEnd(&stream); - return st; -} - -static int revalidate_loose_object(struct object_entry *entry, - unsigned char *map, - unsigned long mapsize) -{ - /* we already know this is a loose object with new type header. */ - enum object_type type; - unsigned long size, used; - - if (pack_to_stdout) - return 0; - - used = unpack_object_header_gently(map, mapsize, &type, &size); - if (!used) - return -1; - map += used; - mapsize -= used; - return check_loose_inflate(map, mapsize, size); -} - static unsigned long write_object(struct sha1file *f, struct object_entry *entry) { @@ -425,25 +375,6 @@ static unsigned long write_object(struct sha1file *f, * and we do not need to deltify it. */ - if (!entry->in_pack && !entry->delta) { - unsigned char *map; - unsigned long mapsize; - map = map_sha1_file(entry->sha1, &mapsize); - if (map && !legacy_loose_object(map)) { - /* We can copy straight into the pack file */ - if (revalidate_loose_object(entry, map, mapsize)) - die("corrupt loose object %s", - sha1_to_hex(entry->sha1)); - sha1write(f, map, mapsize); - munmap(map, mapsize); - written++; - reused++; - return mapsize; - } - if (map) - munmap(map, mapsize); - } - if (!to_reuse) { buf = read_sha1_file(entry->sha1, &type, &size); if (!buf) diff --git a/cache.h b/cache.h index 8e76152..5725bce 100644 --- a/cache.h +++ b/cache.h @@ -273,7 +273,6 @@ extern void rollback_lock_file(struct lock_file *); extern int delete_ref(const char *, const unsigned char *sha1); /* Environment bits from configuration mechanism */ -extern int use_legacy_headers; extern int trust_executable_bit; extern int has_symlinks; extern int assume_unchanged; @@ -354,7 +353,6 @@ extern int move_temp_to_file(const char *tmpfile, const char *filename); extern int has_sha1_pack(const unsigned char *sha1, const char **ignore); extern int has_sha1_file(const unsigned char *sha1); extern void *map_sha1_file(const unsigned char *sha1, unsigned long *); -extern int legacy_loose_object(unsigned char *); extern int has_pack_file(const unsigned char *sha1); extern int has_pack_index(const unsigned char *sha1); diff --git a/config.c b/config.c index 70d1055..298966f 100644 --- a/config.c +++ b/config.c @@ -299,11 +299,6 @@ int git_default_config(const char *var, const char *value) return 0; } - if (!strcmp(var, "core.legacyheaders")) { - use_legacy_headers = git_config_bool(var, value); - return 0; - } - if (!strcmp(var, "core.compression")) { int level = git_config_int(var, value); if (level == -1) diff --git a/environment.c b/environment.c index 2231659..54e3aba 100644 --- a/environment.c +++ b/environment.c @@ -11,7 +11,6 @@ char git_default_email[MAX_GITNAME]; char git_default_name[MAX_GITNAME]; -int use_legacy_headers = 1; int trust_executable_bit = 1; int has_symlinks = 1; int assume_unchanged; diff --git a/sha1_file.c b/sha1_file.c index 32244d7..e715527 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -972,7 +972,7 @@ void *map_sha1_file(const unsigned char *sha1, unsigned long *size) return map; } -int legacy_loose_object(unsigned char *map) +static int legacy_loose_object(unsigned char *map) { unsigned int word; @@ -1034,6 +1034,14 @@ static int unpack_sha1_header(z_stream *stream, unsigned char *map, unsigned lon return inflate(stream, 0); } + + /* + * There used to be a second loose object header format which + * was meant to mimic the in-pack format, allowing for direct + * copy of the object data. This format turned up not to be + * really worth it and we don't write it any longer. But we + * can still read it. + */ used = unpack_object_header_gently(map, mapsize, &type, &size); if (!used || !valid_loose_object_type[type]) return -1; @@ -1962,40 +1970,6 @@ static int write_buffer(int fd, const void *buf, size_t len) return 0; } -static int write_binary_header(unsigned char *hdr, enum object_type type, unsigned long len) -{ - int hdr_len; - unsigned char c; - - c = (type << 4) | (len & 15); - len >>= 4; - hdr_len = 1; - while (len) { - *hdr++ = c | 0x80; - hdr_len++; - c = (len & 0x7f); - len >>= 7; - } - *hdr = c; - return hdr_len; -} - -static void setup_object_header(z_stream *stream, const char *type, unsigned long len) -{ - int obj_type, hdrlen; - - if (use_legacy_headers) { - while (deflate(stream, 0) == Z_OK) - /* nothing */; - return; - } - obj_type = type_from_string(type); - hdrlen = write_binary_header(stream->next_out, obj_type, len); - stream->total_out = hdrlen; - stream->next_out += hdrlen; - stream->avail_out -= hdrlen; -} - int hash_sha1_file(const void *buf, unsigned long len, const char *type, unsigned char *sha1) { @@ -2062,7 +2036,8 @@ int write_sha1_file(void *buf, unsigned long len, const char *type, unsigned cha /* First header.. */ stream.next_in = (unsigned char *)hdr; stream.avail_in = hdrlen; - setup_object_header(&stream, type, len); + while (deflate(&stream, 0) == Z_OK) + /* nothing */; /* Then the data itself.. */ stream.next_in = buf; ^ permalink raw reply related [flat|nested] 23+ messages in thread
* Re: [PATCH] deprecate the new loose object header format 2007-05-09 18:42 ` [PATCH] deprecate the new loose object header format Nicolas Pitre @ 2007-05-09 20:16 ` Dana How 2007-05-09 20:42 ` Nicolas Pitre 0 siblings, 1 reply; 23+ messages in thread From: Dana How @ 2007-05-09 20:16 UTC (permalink / raw) To: Nicolas Pitre; +Cc: Junio C Hamano, Git Mailing List, danahow This doesn't just deprecate the format, it removes the ability to create it. So to me this patch goes too far. Also, if we're interested in "simpler", wouldn't it be better for loose and in-pack objects to be the same? I thought that was the point of !legacy_headers. Whatever the decision is, it certainly has little impact on CPU effort as you point out. Maybe I'm too conservative because I'm less famililar with the code and haven't written it. Personally I oscillate between adding and refactoring (you haven't seen any refactoring yet ;-) ). Thanks, Dana On 5/9/07, Nicolas Pitre <nico@cam.org> wrote: > Now that we encourage and actively preserve objects in a packed form > more agressively than we did at the time the new loose object format and > core.legacyheaders were introduced, that extra loose object format > doesn't appear to be worth it anymore. > > Because the packing of loose objects has to go through the delta match > loop anyway, and since most of them should end up being deltified in > most cases, there is really little advantage to have this parallel loose > object format as the CPU savings it might provide is rather lost in the > noise in the end. > > This patch gets rid of core.legacyheaders, preserve the legacy format as > the only writable loose object format and deprecate the other one to > keep things simpler. > > Signed-off-by: Nicolas Pitre <nico@cam.org> > --- > > On Wed, 9 May 2007, Junio C Hamano wrote: > > > I agree with your analysis, especially when deeper delta chains > > are allowed, straight copy of loose object becomes less and less > > likely. > > So here it is, with a nice code reduction: > > Documentation/config.txt | 13 -------- > builtin-pack-objects.c | 69 ---------------------------------------------- > cache.h | 2 - > config.c | 5 --- > environment.c | 1 - > sha1_file.c | 47 +++++++------------------------ > 6 files changed, 11 insertions(+), 126 deletions(-) > > diff --git a/Documentation/config.txt b/Documentation/config.txt > index ea434af..d6d89ba 100644 > --- a/Documentation/config.txt > +++ b/Documentation/config.txt > @@ -209,19 +209,6 @@ core.compression:: > compression, and 1..9 are various speed/size tradeoffs, 9 being > slowest. > > -core.legacyheaders:: > - A boolean which > - changes the format of loose objects so that they are more > - efficient to pack and to send out of the repository over git > - native protocol, since v1.4.2. However, loose objects > - written in the new format cannot be read by git older than > - that version; people fetching from your repository using > - older versions of git over dumb transports (e.g. http) > - will also be affected. > -+ > -To let git use the new loose object format, you have to > -set core.legacyheaders to false. > - > core.packedGitWindowSize:: > Number of bytes of a pack file to map into memory in a > single mapping operation. Larger window sizes may allow > diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c > index 966f843..c74a361 100644 > --- a/builtin-pack-objects.c > +++ b/builtin-pack-objects.c > @@ -346,56 +346,6 @@ static void copy_pack_data(struct sha1file *f, > } > } > > -static int check_loose_inflate(unsigned char *data, unsigned long len, unsigned long expect) > -{ > - z_stream stream; > - unsigned char fakebuf[4096]; > - int st; > - > - memset(&stream, 0, sizeof(stream)); > - stream.next_in = data; > - stream.avail_in = len; > - stream.next_out = fakebuf; > - stream.avail_out = sizeof(fakebuf); > - inflateInit(&stream); > - > - while (1) { > - st = inflate(&stream, Z_FINISH); > - if (st == Z_STREAM_END || st == Z_OK) { > - st = (stream.total_out == expect && > - stream.total_in == len) ? 0 : -1; > - break; > - } > - if (st != Z_BUF_ERROR) { > - st = -1; > - break; > - } > - stream.next_out = fakebuf; > - stream.avail_out = sizeof(fakebuf); > - } > - inflateEnd(&stream); > - return st; > -} > - > -static int revalidate_loose_object(struct object_entry *entry, > - unsigned char *map, > - unsigned long mapsize) > -{ > - /* we already know this is a loose object with new type header. */ > - enum object_type type; > - unsigned long size, used; > - > - if (pack_to_stdout) > - return 0; > - > - used = unpack_object_header_gently(map, mapsize, &type, &size); > - if (!used) > - return -1; > - map += used; > - mapsize -= used; > - return check_loose_inflate(map, mapsize, size); > -} > - > static unsigned long write_object(struct sha1file *f, > struct object_entry *entry) > { > @@ -425,25 +375,6 @@ static unsigned long write_object(struct sha1file *f, > * and we do not need to deltify it. > */ > > - if (!entry->in_pack && !entry->delta) { > - unsigned char *map; > - unsigned long mapsize; > - map = map_sha1_file(entry->sha1, &mapsize); > - if (map && !legacy_loose_object(map)) { > - /* We can copy straight into the pack file */ > - if (revalidate_loose_object(entry, map, mapsize)) > - die("corrupt loose object %s", > - sha1_to_hex(entry->sha1)); > - sha1write(f, map, mapsize); > - munmap(map, mapsize); > - written++; > - reused++; > - return mapsize; > - } > - if (map) > - munmap(map, mapsize); > - } > - > if (!to_reuse) { > buf = read_sha1_file(entry->sha1, &type, &size); > if (!buf) > diff --git a/cache.h b/cache.h > index 8e76152..5725bce 100644 > --- a/cache.h > +++ b/cache.h > @@ -273,7 +273,6 @@ extern void rollback_lock_file(struct lock_file *); > extern int delete_ref(const char *, const unsigned char *sha1); > > /* Environment bits from configuration mechanism */ > -extern int use_legacy_headers; > extern int trust_executable_bit; > extern int has_symlinks; > extern int assume_unchanged; > @@ -354,7 +353,6 @@ extern int move_temp_to_file(const char *tmpfile, const char *filename); > extern int has_sha1_pack(const unsigned char *sha1, const char **ignore); > extern int has_sha1_file(const unsigned char *sha1); > extern void *map_sha1_file(const unsigned char *sha1, unsigned long *); > -extern int legacy_loose_object(unsigned char *); > > extern int has_pack_file(const unsigned char *sha1); > extern int has_pack_index(const unsigned char *sha1); > diff --git a/config.c b/config.c > index 70d1055..298966f 100644 > --- a/config.c > +++ b/config.c > @@ -299,11 +299,6 @@ int git_default_config(const char *var, const char *value) > return 0; > } > > - if (!strcmp(var, "core.legacyheaders")) { > - use_legacy_headers = git_config_bool(var, value); > - return 0; > - } > - > if (!strcmp(var, "core.compression")) { > int level = git_config_int(var, value); > if (level == -1) > diff --git a/environment.c b/environment.c > index 2231659..54e3aba 100644 > --- a/environment.c > +++ b/environment.c > @@ -11,7 +11,6 @@ > > char git_default_email[MAX_GITNAME]; > char git_default_name[MAX_GITNAME]; > -int use_legacy_headers = 1; > int trust_executable_bit = 1; > int has_symlinks = 1; > int assume_unchanged; > diff --git a/sha1_file.c b/sha1_file.c > index 32244d7..e715527 100644 > --- a/sha1_file.c > +++ b/sha1_file.c > @@ -972,7 +972,7 @@ void *map_sha1_file(const unsigned char *sha1, unsigned long *size) > return map; > } > > -int legacy_loose_object(unsigned char *map) > +static int legacy_loose_object(unsigned char *map) > { > unsigned int word; > > @@ -1034,6 +1034,14 @@ static int unpack_sha1_header(z_stream *stream, unsigned char *map, unsigned lon > return inflate(stream, 0); > } > > + > + /* > + * There used to be a second loose object header format which > + * was meant to mimic the in-pack format, allowing for direct > + * copy of the object data. This format turned up not to be > + * really worth it and we don't write it any longer. But we > + * can still read it. > + */ > used = unpack_object_header_gently(map, mapsize, &type, &size); > if (!used || !valid_loose_object_type[type]) > return -1; > @@ -1962,40 +1970,6 @@ static int write_buffer(int fd, const void *buf, size_t len) > return 0; > } > > -static int write_binary_header(unsigned char *hdr, enum object_type type, unsigned long len) > -{ > - int hdr_len; > - unsigned char c; > - > - c = (type << 4) | (len & 15); > - len >>= 4; > - hdr_len = 1; > - while (len) { > - *hdr++ = c | 0x80; > - hdr_len++; > - c = (len & 0x7f); > - len >>= 7; > - } > - *hdr = c; > - return hdr_len; > -} > - > -static void setup_object_header(z_stream *stream, const char *type, unsigned long len) > -{ > - int obj_type, hdrlen; > - > - if (use_legacy_headers) { > - while (deflate(stream, 0) == Z_OK) > - /* nothing */; > - return; > - } > - obj_type = type_from_string(type); > - hdrlen = write_binary_header(stream->next_out, obj_type, len); > - stream->total_out = hdrlen; > - stream->next_out += hdrlen; > - stream->avail_out -= hdrlen; > -} > - > int hash_sha1_file(const void *buf, unsigned long len, const char *type, > unsigned char *sha1) > { > @@ -2062,7 +2036,8 @@ int write_sha1_file(void *buf, unsigned long len, const char *type, unsigned cha > /* First header.. */ > stream.next_in = (unsigned char *)hdr; > stream.avail_in = hdrlen; > - setup_object_header(&stream, type, len); > + while (deflate(&stream, 0) == Z_OK) > + /* nothing */; > > /* Then the data itself.. */ > stream.next_in = buf; > -- Dana L. How danahow@gmail.com +1 650 804 5991 cell ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH] deprecate the new loose object header format 2007-05-09 20:16 ` Dana How @ 2007-05-09 20:42 ` Nicolas Pitre 2007-05-09 21:00 ` Dana How 0 siblings, 1 reply; 23+ messages in thread From: Nicolas Pitre @ 2007-05-09 20:42 UTC (permalink / raw) To: Dana How; +Cc: Junio C Hamano, Git Mailing List On Wed, 9 May 2007, Dana How wrote: > This doesn't just deprecate the format, > it removes the ability to create it. > So to me this patch goes too far. Well, I disagree. We cannot create it anymore, but we still can read it. Older Git versions nay not even read it. And since this format was off by default anyway, I doubt you've lost anything. > Also, if we're interested in "simpler", wouldn't > it be better for loose and in-pack objects to be > the same? I thought that was the point of > !legacy_headers. It is not simpler because: 1) backward compatibility requires the legacy format, and 2) the object SHA1 is always computed with the legacy header included. So what is simpler is really to get rid of over 100 lines of code that didn't provide a real benefit. The faster we remove the ability to write such objects the fewer they'll be in the field. Nicolas ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH] deprecate the new loose object header format 2007-05-09 20:42 ` Nicolas Pitre @ 2007-05-09 21:00 ` Dana How 0 siblings, 0 replies; 23+ messages in thread From: Dana How @ 2007-05-09 21:00 UTC (permalink / raw) To: Nicolas Pitre; +Cc: Junio C Hamano, Git Mailing List, danahow On 5/9/07, Nicolas Pitre <nico@cam.org> wrote: > It is not simpler because: > 2) the object SHA1 is always computed with the legacy header included. That convinces me. Thanks, -- Dana L. How danahow@gmail.com +1 650 804 5991 cell ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH v2] Custom compression levels for objects and packs 2007-05-09 0:25 ` Dana How 2007-05-09 1:23 ` Nicolas Pitre @ 2007-05-09 5:59 ` Junio C Hamano 2007-05-09 6:24 ` Dana How 1 sibling, 1 reply; 23+ messages in thread From: Junio C Hamano @ 2007-05-09 5:59 UTC (permalink / raw) To: Dana How; +Cc: Git Mailing List "Dana How" <danahow@gmail.com> writes: > On 5/8/07, Junio C Hamano <junkio@cox.net> wrote: >> Dana How <danahow@gmail.com> writes: >> ... >> > This applies on top of the git-repack --max-pack-size patchset. > >> Hmph, that makes the --max-pack-size patchset take this more >> trivial and straightforward improvements hostage. In general, >> I'd prefer more elaborate ones based on less questionable >> series. > > The max-pack-size and pack.compression patches touch the same lines. > I thought my options were: > * Submit independently and make you merge; or > * Make one precede the other. > Since max-pack-size has been out there since April 4 and > the first acceptable version was May 1 (suggested by 0 comments), > I didn't realize it was a "questionable series". No, what I meant was that it is much "more elaborate" series than this custom compression which is much "less questionable". I think this custom compression is 1.5.2 material. I have not studied the code for the max-pack-size enough to be confident to put it in 1.5.2, at least not yet, and was planning to park the latter in 'next' until 1.5.2 final. ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH v2] Custom compression levels for objects and packs 2007-05-09 5:59 ` [PATCH v2] Custom compression levels for objects and packs Junio C Hamano @ 2007-05-09 6:24 ` Dana How 0 siblings, 0 replies; 23+ messages in thread From: Dana How @ 2007-05-09 6:24 UTC (permalink / raw) To: Junio C Hamano; +Cc: Git Mailing List, danahow, Nicolas Pitre On 5/8/07, Junio C Hamano <junkio@cox.net> wrote: > "Dana How" <danahow@gmail.com> writes: > > On 5/8/07, Junio C Hamano <junkio@cox.net> wrote: > >> Dana How <danahow@gmail.com> writes: > >> > This applies on top of the git-repack --max-pack-size patchset. > >> Hmph, that makes the --max-pack-size patchset take this more > >> trivial and straightforward improvements hostage. In general, > >> I'd prefer more elaborate ones based on less questionable > >> series. > > The max-pack-size and pack.compression patches touch the same lines. > > I thought my options were: > > * Submit independently and make you merge; or > > * Make one precede the other. > > Since max-pack-size has been out there since April 4 and > > the first acceptable version was May 1 (suggested by 0 comments), > > I didn't realize it was a "questionable series". > No, what I meant was that it is much "more elaborate" series > than this custom compression which is much "less questionable". > > I think this custom compression is 1.5.2 material. I have not > studied the code for the max-pack-size enough to be confident to > put it in 1.5.2, at least not yet, and was planning to park the > latter in 'next' until 1.5.2 final. OK, thanks for guesstimating the overall schedule. I was starting to wonder what the next step(s) should be. I will incorporate your & Nicolas's comments and send out a new custom compression patch tomorrow. I *think* I addressed everyone's comments on max-pack-size, but let me know if you find anything else when you get around to it. Thanks, -- Dana L. How danahow@gmail.com +1 650 804 5991 cell ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH v2] Custom compression levels for objects and packs 2007-05-08 22:38 [PATCH v2] Custom compression levels for objects and packs Dana How 2007-05-08 23:56 ` Junio C Hamano @ 2007-05-09 0:30 ` Petr Baudis 2007-05-09 13:56 ` Theodore Tso 2 siblings, 0 replies; 23+ messages in thread From: Petr Baudis @ 2007-05-09 0:30 UTC (permalink / raw) To: Dana How; +Cc: Junio C Hamano, Git Mailing List On Wed, May 09, 2007 at 12:38:22AM CEST, Dana How wrote: > diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c > index 8824793..e80a1d6 100644 > --- a/builtin-pack-objects.c > +++ b/builtin-pack-objects.c > @@ -444,6 +446,10 @@ static unsigned long write_object(struct sha1file *f, > * and we do not need to deltify it. > */ > > + /* differing core & pack compression when loose object -> must recompress */ > + if (!entry->in_pack && pack_compression_level != zlib_compression_level) > + to_reuse = 0; > + else > if (!entry->in_pack && !entry->delta) { Style: the else and if should be probably on the same line. > unsigned char *map; > unsigned long mapsize; > @@ -1624,6 +1630,16 @@ static int git_pack_config(const char *k, const char *v) > window = git_config_int(k, v); > return 0; > } > + if (!strcmp(k, "pack.compression")) { > + int level = git_config_int(k, v); > + if (level == -1) > + level = Z_DEFAULT_COMPRESSION; > + else if (level < 0 || level > Z_BEST_COMPRESSION) > + die("bad pack compression level %d", level); > + pack_compression_level = level; > + pack_compression_seen = 1; > + return 0; > + } > return git_default_config(k, v); > } > Where is this documented? > @@ -1761,6 +1779,18 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) > usage(pack_usage); > continue; > } > + if (!prefixcmp(arg, "--compression=")) { > + char *end; > + int level = strtoul(arg+14, &end, 0); > + if (!arg[14] || *end) > + usage(pack_usage); > + if (level == -1) > + level = Z_DEFAULT_COMPRESSION; > + else if (level < 0 || level > Z_BEST_COMPRESSION) > + die("bad pack compression level %d", level); > + pack_compression_level = level; > + continue; > + } > if (!prefixcmp(arg, "--window=")) { > char *end; > window = strtoul(arg+9, &end, 0); Where is this documented? > diff --git a/config.c b/config.c > index 70d1055..5627ed6 100644 > --- a/config.c > +++ b/config.c > @@ -304,13 +306,27 @@ int git_default_config(const char *var, const char *value) > return 0; > } > > - if (!strcmp(var, "core.compression")) { > + if (!strcmp(var, "core.loosecompression")) { Is this config variable documented? -- Petr "Pasky" Baudis Stuff: http://pasky.or.cz/ Ever try. Ever fail. No matter. // Try again. Fail again. Fail better. -- Samuel Beckett ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH v2] Custom compression levels for objects and packs 2007-05-08 22:38 [PATCH v2] Custom compression levels for objects and packs Dana How 2007-05-08 23:56 ` Junio C Hamano 2007-05-09 0:30 ` Petr Baudis @ 2007-05-09 13:56 ` Theodore Tso 2007-05-09 16:44 ` Dana How 2 siblings, 1 reply; 23+ messages in thread From: Theodore Tso @ 2007-05-09 13:56 UTC (permalink / raw) To: Dana How; +Cc: Junio C Hamano, Git Mailing List I noticed that the patch didn't include additions to Documentation/config.txt; could those be added, so that as much as possible all of the various configuration knobs are documented in one place, please? Thanks!! - Ted ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH v2] Custom compression levels for objects and packs 2007-05-09 13:56 ` Theodore Tso @ 2007-05-09 16:44 ` Dana How 0 siblings, 0 replies; 23+ messages in thread From: Dana How @ 2007-05-09 16:44 UTC (permalink / raw) To: Theodore Tso; +Cc: Junio C Hamano, Git Mailing List, danahow On 5/9/07, Theodore Tso <tytso@mit.edu> wrote: > I noticed that the patch didn't include additions to > Documentation/config.txt; could those be added, so that as much as > possible all of the various configuration knobs are documented in one > place, please? I will update the documentation, after we've stopped changing the set of options and behaviors. You can see that's just what I did in the max-pack-size patch. Thanks, -- Dana L. How danahow@gmail.com +1 650 804 5991 cell ^ permalink raw reply [flat|nested] 23+ messages in thread
end of thread, other threads:[~2007-05-09 21:01 UTC | newest] Thread overview: 23+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2007-05-08 22:38 [PATCH v2] Custom compression levels for objects and packs Dana How 2007-05-08 23:56 ` Junio C Hamano 2007-05-09 0:16 ` Nicolas Pitre 2007-05-09 0:29 ` Dana How 2007-05-09 1:03 ` Nicolas Pitre 2007-05-09 6:46 ` Dana How 2007-05-09 7:13 ` Junio C Hamano 2007-05-09 0:25 ` Dana How 2007-05-09 1:23 ` Nicolas Pitre 2007-05-09 9:21 ` Dana How 2007-05-09 15:27 ` Nicolas Pitre 2007-05-09 16:26 ` Junio C Hamano 2007-05-09 16:42 ` Dana How 2007-05-09 16:59 ` [PATCH] make "repack -f" imply "pack-objects --no-reuse-object" Nicolas Pitre 2007-05-09 18:42 ` [PATCH] deprecate the new loose object header format Nicolas Pitre 2007-05-09 20:16 ` Dana How 2007-05-09 20:42 ` Nicolas Pitre 2007-05-09 21:00 ` Dana How 2007-05-09 5:59 ` [PATCH v2] Custom compression levels for objects and packs Junio C Hamano 2007-05-09 6:24 ` Dana How 2007-05-09 0:30 ` Petr Baudis 2007-05-09 13:56 ` Theodore Tso 2007-05-09 16:44 ` Dana How
Code repositories for project(s) associated with this public inbox https://80x24.org/mirrors/git.git This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).