From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.2 required=3.0 tests=ALL_TRUSTED,BAYES_00, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF shortcircuit=no autolearn=ham autolearn_force=no version=3.4.6 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 661311F4B8 for ; Sun, 28 Apr 2024 21:15:24 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org; s=selector1; t=1714338924; bh=LgL04b/gDdqeDm3mVhncd/NZlQmFwmJBDoCy/n5MB0A=; h=From:To:Subject:Date:From; b=pjf47VaIm6UCAIzevrchckoT2CYuJRcLcfGTJ6HWzQwF6Ega2QBWcbjbCjioMWYDB m4tkxAqrGFMH0wRDbKLp/qwsCd3O7PrjBlWeY7FW4NE8SF37gk2yxnzTEUykXNxPu1 uP/bcdEZTgEPkvlDuofVHibpw7nqy3wZSXMXLnsI= From: Eric Wong To: meta@public-inbox.org Subject: [PATCH] extindex: support --no-multi-pack-index Date: Sun, 28 Apr 2024 21:15:24 +0000 Message-ID: <20240428211524.2006976-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: git multi-pack-index files were creating swap storms and OOM-ing on my system; so providing an option to disable it seems prudent given the minor startup time regression. --- Documentation/public-inbox-extindex.pod | 13 +++++++++++++ Documentation/public-inbox-index.pod | 7 +++++++ lib/PublicInbox/ExtSearchIdx.pm | 2 +- script/public-inbox-extindex | 2 +- script/public-inbox-index | 1 + t/extsearch.t | 9 +++++++++ 6 files changed, 32 insertions(+), 2 deletions(-) diff --git a/Documentation/public-inbox-extindex.pod b/Documentation/public-inbox-extindex.pod index b53e45ed..2db7d7e9 100644 --- a/Documentation/public-inbox-extindex.pod +++ b/Documentation/public-inbox-extindex.pod @@ -80,6 +80,19 @@ doubles the size of the already-large Xapian database. Used with C<--reindex>, it will only look for new and stale entries and not touch already-indexed messages. +=item --no-multi-pack-index + +Disable writing a L file to save memory. +Normally, enabling multi-pack-index speeds up startup time of +subsequent L processes by 3-4%, but generating +this file requires several GB of memory with large repos. + +Unlike the C directive in git, it's still +possible to read existing multi-pack-index files if they are +created elsewhere. + +Available in public-inbox 2.0.0+ + =back =head1 FILES diff --git a/Documentation/public-inbox-index.pod b/Documentation/public-inbox-index.pod index 14f157a5..f1a2180a 100644 --- a/Documentation/public-inbox-index.pod +++ b/Documentation/public-inbox-index.pod @@ -192,6 +192,13 @@ external indices are configured. Do not update the C external index by default. This negates all uses of C<-E> / C<--update-extindex=> on the command-line. +=item --no-multi-pack-index + +Disables writing the multi-pack-index when using L. +See L for details. + +Available in public-inbox 2.0.0+ + =item --since=DATESTRING =item --after=DATESTRING diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm index 763a124c..774fa47b 100644 --- a/lib/PublicInbox/ExtSearchIdx.pm +++ b/lib/PublicInbox/ExtSearchIdx.pm @@ -1287,7 +1287,7 @@ sub idx_init { # similar to V2Writable ($has_new || $prune_nr || $new ne '') and $self->{mg}->write_alternates($mode, $alt, $new); my $restore = $self->with_umask; - if ($git_midx) { + if ($git_midx && ($opt->{'multi-pack-index'} // 1)) { my @cmd = ('multi-pack-index'); push @cmd, '--no-progress' if ($opt->{quiet}//0) > 1; my $lk = $self->lock_for_scope; diff --git a/script/public-inbox-extindex b/script/public-inbox-extindex index bee824b1..2e5a5d2c 100755 --- a/script/public-inbox-extindex +++ b/script/public-inbox-extindex @@ -32,7 +32,7 @@ GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i indexlevel|index-level|L=s max_size|max-size=s batch_size|batch-size=s dedupe:s@ gc commit-interval=i watch scan! dry-run|n - all C=s@ help|h)) + multi-pack-index! all C=s@ help|h)) or die $help; if ($opt->{help}) { print $help; exit 0 }; die "--jobs must be >= 0\n" if defined $opt->{jobs} && $opt->{jobs} < 0; diff --git a/script/public-inbox-index b/script/public-inbox-index index 74232ebf..a13e44bf 100755 --- a/script/public-inbox-index +++ b/script/public-inbox-index @@ -44,6 +44,7 @@ GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i prune batch_size|batch-size=s since|after=s until|before=s sequential-shard|seq-shard + multi-pack-index! no-update-extindex update-extindex|E=s@ fast-noop|F skip-docdata all C=s@ help|h)) or die $help; diff --git a/t/extsearch.t b/t/extsearch.t index 090f6db5..797aa8f5 100644 --- a/t/extsearch.t +++ b/t/extsearch.t @@ -559,6 +559,15 @@ EOM for (@xdb) { ok(!$_->get_metadata('indexlevel'), 'no indexlevel in >0 shard') } + my $mpi = "$d/ALL.git/objects/pack/multi-pack-index"; + SKIP: { + skip 'git too old for for multi-pack-index', 2 if !-f $mpi; + unlink glob("$d/ALL.git/objects/pack/*"); + ok run_script([qw(-extindex --all -L medium -j3 + --no-multi-pack-index), $d]), + 'test --no-multi-pack-index'; + ok !-f $mpi, '--no-multi-pack-index respected'; + } } test_lei(sub {