1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
| | #!perl -w
# Copyright (C) 2015-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# Basic tool to create a Xapian search index for a public-inbox.
# Usage with libeatmydata <https://www.flamingspork.com/projects/libeatmydata/>
# highly recommended: eatmydata public-inbox-index INBOX_DIR
use strict;
use v5.10.1;
use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev);
my $help = <<EOF; # the following should fit w/o scrolling in 80x24 term:
usage: public-inbox-index [options] INBOX_DIR
Create and update per-inbox search indices
options:
--no-fsync speed up indexing, risk corruption on power outage
-L LEVEL `basic', `medium', or `full' (default: full)
-E EXTINDEX update extindex (default: `all')
--all index all configured inboxes
--compact | -c run public-inbox-compact(1) after indexing
--sequential-shard index Xapian shards sequentially for slow storage
--jobs=NUM set or disable parallelization (NUM=0)
--batch-size=BYTES flush changes to OS after a given number of bytes
--max-size=BYTES do not index messages larger than the given size
--reindex index previously indexed data (if upgrading)
--since=DATE limit --reindex to changes after DATE
--until=DATE limit --reindex to changes before DATE
--rethread regenerate thread IDs (if upgrading, use sparingly)
--prune prune git storage on discontiguous history
--verbose | -v increase verbosity (may be repeated)
BYTES may use `k', `m', and `g' suffixes (e.g. `10m' for 10 megabytes)
See public-inbox-index(1) man page for full documentation.
EOF
my $opt = {
quiet => -1, compact => 0, max_size => undef, fsync => 1,
'update-extindex' => [], # ":s@" optional arg sets '' if no arg given
};
GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i prune
fsync|sync! xapian_only|xapian-only dangerous
indexlevel|index-level|L=s max_size|max-size=s
batch_size|batch-size=s
since|after=s until|before=s
sequential-shard|seq-shard
multi-pack-index!
no-update-extindex update-extindex|E=s@
fast-noop|F skip-docdata all C=s@ help|h))
or die $help;
if ($opt->{help}) { print $help; exit 0 };
die "--jobs must be >= 0\n" if defined $opt->{jobs} && $opt->{jobs} < 0;
if ($opt->{xapian_only} && !$opt->{reindex}) {
die "--xapian-only requires --reindex\n";
}
if ($opt->{reindex} && delete($opt->{'fast-noop'})) {
warn "--fast-noop ignored with --reindex\n";
}
# require lazily to speed up --help
require PublicInbox::Admin;
PublicInbox::Admin::require_or_die('-index');
PublicInbox::Admin::do_chdir(delete $opt->{C});
my $cfg = PublicInbox::Config->new; # Config is loaded by Admin
$opt->{-use_cwd} = 1;
my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg);
PublicInbox::Admin::require_or_die('-index');
unless (@ibxs) { print STDERR $help; exit 1 }
require PublicInbox::InboxWritable;
my (@eidx, %eidx_seen);
my $update_extindex = $opt->{'update-extindex'};
if (!scalar(@$update_extindex) && (my $ALL = $cfg->ALL)) {
# extindex and normal inboxes may have different owners
push(@$update_extindex, 'all') if -w $ALL->{topdir};
}
@$update_extindex = () if $opt->{'no-update-extindex'};
if (scalar @$update_extindex) {
PublicInbox::Admin::require_or_die('-search');
require PublicInbox::ExtSearchIdx;
}
for my $ei_name (@$update_extindex) {
my $es = $cfg->lookup_ei($ei_name);
my $topdir;
if (!$es && -d $ei_name) { # allow dirname or config section name
$topdir = $ei_name;
} elsif ($es) {
$topdir = $es->{topdir};
} else {
die "extindex `$ei_name' not configured or found\n";
}
my $o = { %$opt };
delete $o->{indexlevel} if ($o->{indexlevel}//'') eq 'basic';
$eidx_seen{$topdir} //=
push(@eidx, PublicInbox::ExtSearchIdx->new($topdir, $o));
}
my $mods = {};
my @eidx_unconfigured;
foreach my $ibx (@ibxs) {
$ibx = PublicInbox::InboxWritable->new($ibx);
# detect_indexlevel may also set $ibx->{-skip_docdata}
my $detected = $ibx->detect_indexlevel;
# XXX: users can shoot themselves in the foot, with opt->{indexlevel}
$ibx->{indexlevel} //= $opt->{indexlevel} // ($opt->{xapian_only} ?
'full' : $detected);
PublicInbox::Admin::scan_ibx_modules($mods, $ibx);
if (@eidx && $ibx->{-unconfigured}) {
push @eidx_unconfigured, " $ibx->{inboxdir}\n";
}
}
warn <<EOF if @eidx_unconfigured;
The following inboxes are unconfigured and will not be updated in
@$update_extindex:\n@eidx_unconfigured
EOF
$opt->{compact} = 0 if !$mods->{'Xapian'}; # (or old Search::Xapian)
PublicInbox::Admin::require_or_die(keys %$mods);
my $env = PublicInbox::Admin::index_prepare($opt, $cfg);
local %ENV = (%ENV, %$env) if $env;
PublicInbox::Xapcmd::check_compact() if $opt->{compact};
PublicInbox::Admin::progress_prepare($opt);
for my $ibx (@ibxs) {
if ($opt->{compact} >= 2) {
PublicInbox::Xapcmd::run($ibx, 'compact', $opt->{compact_opt});
}
$ibx->{-no_fsync} = 1 if !$opt->{fsync};
$ibx->{-dangerous} = 1 if $opt->{dangerous};
$ibx->{-skip_docdata} //= $opt->{'skip-docdata'};
my $ibx_opt = $opt;
if (defined(my $s = $ibx->{lc('indexSequentialShard')})) {
defined(my $v = $cfg->git_bool($s)) or die <<EOL;
publicInbox.$ibx->{name}.indexSequentialShard not boolean
EOL
$ibx_opt = { %$opt, 'sequential-shard' => $v };
}
my $nidx = PublicInbox::Admin::index_inbox($ibx, undef, $ibx_opt);
last if $ibx_opt->{quit};
if (my $copt = $opt->{compact_opt}) {
local $copt->{jobs} = 0 if $ibx_opt->{'sequential-shard'};
PublicInbox::Xapcmd::run($ibx, 'compact', $copt);
}
last if $ibx_opt->{quit};
next if $ibx->{-unconfigured} || !$nidx;
for my $eidx (@eidx) {
$eidx->attach_inbox($ibx);
}
}
my $pr = $opt->{-progress};
for my $eidx (@eidx) {
$pr->("indexing $eidx->{topdir} ...\n") if $pr;
$eidx->eidx_sync($opt);
last if $opt->{quit};
}
|