* [PATCH 0/2] index: start speeding up some noop calls
@ 2020-12-24 10:09 Eric Wong
2020-12-24 10:09 ` [PATCH 1/2] inboxwritable: delay umask_prepare calls Eric Wong
2020-12-24 10:09 ` [PATCH 2/2] index: support --fast-noop / -F switch Eric Wong
0 siblings, 2 replies; 3+ messages in thread
From: Eric Wong @ 2020-12-24 10:09 UTC (permalink / raw)
To: meta
Users scripting "public-inbox-index --all" to run after grok-pull
runs have to wait a long time with thousands of inboxes, most
of which don't get updated.
PATCH 1/2 is a no-brainer and improves the opt-in speedup
for PATCH 2/2.
2/2 I'm not 100% sure about. Maybe -F/--fast-noop can become a
default, maybe not. -L medium/full users will notice it the
most, but there's further opportunities for speedups, there.
Eric Wong (2):
inboxwritable: delay umask_prepare calls
index: support --fast-noop / -F switch
lib/PublicInbox/ExtSearchIdx.pm | 2 --
lib/PublicInbox/InboxWritable.pm | 6 ++----
lib/PublicInbox/SearchIdx.pm | 1 -
lib/PublicInbox/V2Writable.pm | 17 +++++++++++------
lib/PublicInbox/Xapcmd.pm | 1 -
script/public-inbox-convert | 1 -
script/public-inbox-index | 2 +-
7 files changed, 14 insertions(+), 16 deletions(-)
^ permalink raw reply [flat|nested] 3+ messages in thread
* [PATCH 1/2] inboxwritable: delay umask_prepare calls
2020-12-24 10:09 [PATCH 0/2] index: start speeding up some noop calls Eric Wong
@ 2020-12-24 10:09 ` Eric Wong
2020-12-24 10:09 ` [PATCH 2/2] index: support --fast-noop / -F switch Eric Wong
1 sibling, 0 replies; 3+ messages in thread
From: Eric Wong @ 2020-12-24 10:09 UTC (permalink / raw)
To: meta
This simplifies all ->with_umask callers and opens the
door for further optimizations to delay/elide process spawning.
---
lib/PublicInbox/ExtSearchIdx.pm | 2 --
lib/PublicInbox/InboxWritable.pm | 6 ++----
lib/PublicInbox/SearchIdx.pm | 1 -
lib/PublicInbox/V2Writable.pm | 3 ---
lib/PublicInbox/Xapcmd.pm | 1 -
script/public-inbox-convert | 1 -
6 files changed, 2 insertions(+), 12 deletions(-)
diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm
index fb627089..c43a6c5e 100644
--- a/lib/PublicInbox/ExtSearchIdx.pm
+++ b/lib/PublicInbox/ExtSearchIdx.pm
@@ -934,7 +934,6 @@ sub idx_init { # similar to V2Writable
PublicInbox::V2Writable::write_alternates($info_dir, $mode, $o);
}
$self->parallel_init($self->{indexlevel});
- $self->umask_prepare;
$self->with_umask(\&_idx_init, $self, $opt);
$self->{oidx}->begin_lazy;
$self->{oidx}->eidx_prep;
@@ -943,7 +942,6 @@ sub idx_init { # similar to V2Writable
no warnings 'once';
*done = \&PublicInbox::V2Writable::done;
-*umask_prepare = \&PublicInbox::InboxWritable::umask_prepare;
*with_umask = \&PublicInbox::InboxWritable::with_umask;
*parallel_init = \&PublicInbox::V2Writable::parallel_init;
*nproc_shards = \&PublicInbox::V2Writable::nproc_shards;
diff --git a/lib/PublicInbox/InboxWritable.pm b/lib/PublicInbox/InboxWritable.pm
index 69275bb0..31eb3f15 100644
--- a/lib/PublicInbox/InboxWritable.pm
+++ b/lib/PublicInbox/InboxWritable.pm
@@ -65,7 +65,6 @@ sub init_inbox {
if ($self->version == 1) {
my $dir = assert_usable_dir($self);
PublicInbox::Import::init_bare($dir);
- $self->umask_prepare;
$self->with_umask(\&_init_v1, $self, $skip_artnum);
} else {
my $v2w = importer($self);
@@ -260,7 +259,7 @@ sub _umask_for {
sub with_umask {
my ($self, $cb, @arg) = @_;
- my $old = umask $self->{umask};
+ my $old = umask($self->{umask} //= umask_prepare($self));
my $rv = eval { $cb->(@arg) };
my $err = $@;
umask $old;
@@ -271,8 +270,7 @@ sub with_umask {
sub umask_prepare {
my ($self) = @_;
my $perm = _git_config_perm($self);
- my $umask = _umask_for($perm);
- $self->{umask} = $umask;
+ _umask_for($perm);
}
sub cleanup ($) {
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index d1b0c724..c8e309fc 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -67,7 +67,6 @@ sub new {
$self->{-set_skip_docdata_once} = 1;
$self->{-skip_docdata} = 1;
}
- $ibx->umask_prepare;
if ($version == 1) {
$self->{lock_path} = "$inboxdir/ssoma.lock";
my $dir = $self->xdir;
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index 3e3b275f..531a72b2 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -97,8 +97,6 @@ sub new {
die "$dir does not exist\n";
}
}
- $v2ibx->umask_prepare;
-
my $xpfx = "$dir/xap" . PublicInbox::Search::SCHEMA_VERSION;
my $self = {
ibx => $v2ibx,
@@ -320,7 +318,6 @@ sub idx_init {
$ibx->git->cleanup;
parallel_init($self, $ibx->{indexlevel});
- $ibx->umask_prepare;
$ibx->with_umask(\&_idx_init, $self, $opt);
}
diff --git a/lib/PublicInbox/Xapcmd.pm b/lib/PublicInbox/Xapcmd.pm
index 4f77ef25..ca2345f7 100644
--- a/lib/PublicInbox/Xapcmd.pm
+++ b/lib/PublicInbox/Xapcmd.pm
@@ -270,7 +270,6 @@ sub run {
local %SIG = %SIG;
setup_signals();
- $ibx->umask_prepare;
$ibx->with_umask(\&_run, $ibx, $cb, $opt);
}
diff --git a/script/public-inbox-convert b/script/public-inbox-convert
index 800c364c..e6ee6529 100755
--- a/script/public-inbox-convert
+++ b/script/public-inbox-convert
@@ -80,7 +80,6 @@ $new->{version} = 2;
$new = PublicInbox::InboxWritable->new($new, { nproc => $opt->{jobs} });
$new->{-no_fsync} = 1 if !$opt->{fsync};
my $v2w;
-$old->umask_prepare;
sub link_or_copy ($$) {
my ($src, $dst) = @_;
^ permalink raw reply related [flat|nested] 3+ messages in thread
* [PATCH 2/2] index: support --fast-noop / -F switch
2020-12-24 10:09 [PATCH 0/2] index: start speeding up some noop calls Eric Wong
2020-12-24 10:09 ` [PATCH 1/2] inboxwritable: delay umask_prepare calls Eric Wong
@ 2020-12-24 10:09 ` Eric Wong
1 sibling, 0 replies; 3+ messages in thread
From: Eric Wong @ 2020-12-24 10:09 UTC (permalink / raw)
To: meta
Note: I'm not sure if it's worth documenting and supporting this
long-term.
We can can avoid taking locks for invocations of "index --all"
and rely on high-resolution ctime (struct timespec st_ctim)
comparisons of msgmap.sqlite3 and the packed-refs + refs/heads
directory of the newest epoch.
This cuts public-inbox-index invocations with
"--all --no-update-extindex -L basic" down from 0.92s to 0.31s.
The change with "-L medium" or "-L full" and (default) non-zero
jobs is even more drastic, reducing a 12-13s no-op invocation
down to the same 0.31s
---
lib/PublicInbox/V2Writable.pm | 14 +++++++++++---
script/public-inbox-index | 2 +-
2 files changed, 12 insertions(+), 4 deletions(-)
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index 531a72b2..2b849ddf 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -1351,11 +1351,19 @@ sub index_sync {
$opt //= {};
return xapian_only($self, $opt) if $opt->{xapian_only};
- my $pr = $opt->{-progress};
my $epoch_max;
- my $latest = $self->{ibx}->git_dir_latest(\$epoch_max);
- return unless defined $latest;
+ my $latest = $self->{ibx}->git_dir_latest(\$epoch_max) // return;
+ if ($opt->{'fast-noop'}) { # nanosecond (st_ctim) comparison
+ use Time::HiRes qw(stat);
+ if (my @mm = stat("$self->{ibx}->{inboxdir}/msgmap.sqlite3")) {
+ my $c = $mm[10]; # 10 = ctime (nsec NV)
+ my @hd = stat("$latest/refs/heads");
+ my @pr = stat("$latest/packed-refs");
+ return if $c > ($hd[10] // 0) && $c > ($pr[10] // 0);
+ }
+ }
+ my $pr = $opt->{-progress};
my $seq = $opt->{sequential_shard};
my $art_beg; # the NNTP article number we start xapian_only at
my $idxlevel = $self->{ibx}->{indexlevel};
diff --git a/script/public-inbox-index b/script/public-inbox-index
index f10bb5ad..91afac88 100755
--- a/script/public-inbox-index
+++ b/script/public-inbox-index
@@ -42,7 +42,7 @@ GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i prune
batch_size|batch-size=s
sequential_shard|seq-shard|sequential-shard
no-update-extindex update-extindex|E=s@
- skip-docdata all help|h))
+ fast-noop|F skip-docdata all help|h))
or die $help;
if ($opt->{help}) { print $help; exit 0 };
die "--jobs must be >= 0\n" if defined $opt->{jobs} && $opt->{jobs} < 0;
^ permalink raw reply related [flat|nested] 3+ messages in thread
end of thread, other threads:[~2020-12-24 10:09 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-12-24 10:09 [PATCH 0/2] index: start speeding up some noop calls Eric Wong
2020-12-24 10:09 ` [PATCH 1/2] inboxwritable: delay umask_prepare calls Eric Wong
2020-12-24 10:09 ` [PATCH 2/2] index: support --fast-noop / -F switch Eric Wong
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).