user/dev discussion of public-inbox itself
 help / color / mirror / Atom feed
c68f922447293bb0b43782e7795d57e9c7387ba0 blob 5594 bytes (raw)

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
 
#!perl -w
# Copyright (C) 2015-2020 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# Basic tool to create a Xapian search index for a public-inbox.
# Usage with libeatmydata <https://www.flamingspork.com/projects/libeatmydata/>
# highly recommended: eatmydata public-inbox-index INBOX_DIR

use strict;
use v5.10.1;
use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev);
my $help = <<EOF; # the following should fit w/o scrolling in 80x24 term:
usage: public-inbox-index [options] INBOX_DIR

  Create and update per-inbox search indices

options:

  --no-fsync          speed up indexing, risk corruption on power outage
  -L LEVEL            `basic', `medium', or `full' (default: full)
  -E EXTINDEX         update extindex (default: `all')
  --all               index all configured inboxes
  --compact | -c      run public-inbox-compact(1) after indexing
  --sequential-shard  index Xapian shards sequentially for slow storage
  --jobs=NUM          set or disable parallelization (NUM=0)
  --batch-size=BYTES  flush changes to OS after a given number of bytes
  --max-size=BYTES    do not index messages larger than the given size
  --reindex           index previously indexed data (if upgrading)
  --rethread          regenerate thread IDs (if upgrading, use sparingly)
  --prune             prune git storage on discontiguous history
  --verbose | -v      increase verbosity (may be repeated)

BYTES may use `k', `m', and `g' suffixes (e.g. `10m' for 10 megabytes)
See public-inbox-index(1) man page for full documentation.
EOF
my $opt = {
	quiet => -1, compact => 0, max_size => undef, fsync => 1,
	'update-extindex' => [], # ":s@" optional arg sets '' if no arg given
};
GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i prune
		fsync|sync! xapian_only|xapian-only
		indexlevel|index-level|L=s max_size|max-size=s
		batch_size|batch-size=s
		sequential_shard|seq-shard|sequential-shard
		no-update-extindex update-extindex|E=s@
		fast-noop|F skip-docdata all help|h))
	or die $help;
if ($opt->{help}) { print $help; exit 0 };
die "--jobs must be >= 0\n" if defined $opt->{jobs} && $opt->{jobs} < 0;
if ($opt->{xapian_only} && !$opt->{reindex}) {
	die "--xapian-only requires --reindex\n";
}
if ($opt->{reindex} && delete($opt->{'fast-noop'})) {
	warn "--fast-noop ignored with --reindex\n";
}

# require lazily to speed up --help
require PublicInbox::Admin;
PublicInbox::Admin::require_or_die('-index');

my $cfg = PublicInbox::Config->new; # Config is loaded by Admin
$opt->{-use_cwd} = 1;
my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg);
PublicInbox::Admin::require_or_die('-index');
unless (@ibxs) { print STDERR $help; exit 1 }

my (@eidx, %eidx_seen);
my $update_extindex = $opt->{'update-extindex'};
if (!scalar(@$update_extindex) && (my $ALL = $cfg->ALL)) {
	# extindex and normal inboxes may have different owners
	push(@$update_extindex, 'all') if -w $ALL->{topdir};
}
@$update_extindex = () if $opt->{'no-update-extindex'};
if (scalar @$update_extindex) {
	PublicInbox::Admin::require_or_die('-search');
	require PublicInbox::ExtSearchIdx;
}
for my $ei_name (@$update_extindex) {
	my $es = $cfg->lookup_ei($ei_name);
	my $topdir;
	if (!$es && -d $ei_name) { # allow dirname or config section name
		$topdir = $ei_name;
	} elsif ($es) {
		$topdir = $es->{topdir};
	} else {
		die "extindex `$ei_name' not configured or found\n";
	}
	$eidx_seen{$topdir} //=
		push(@eidx, PublicInbox::ExtSearchIdx->new($topdir, $opt));
}
my $mods = {};
my @eidx_unconfigured;
foreach my $ibx (@ibxs) {
	# detect_indexlevel may also set $ibx->{-skip_docdata}
	my $detected = PublicInbox::Admin::detect_indexlevel($ibx);
	# XXX: users can shoot themselves in the foot, with opt->{indexlevel}
	$ibx->{indexlevel} //= $opt->{indexlevel} // ($opt->{xapian_only} ?
			'full' : $detected);
	PublicInbox::Admin::scan_ibx_modules($mods, $ibx);
	if (@eidx && $ibx->{-unconfigured}) {
		push @eidx_unconfigured, "  $ibx->{inboxdir}\n";
	}
}
warn <<EOF if @eidx_unconfigured;
The following inboxes are unconfigured and will not be updated in
@$update_extindex:\n@eidx_unconfigured
EOF

# "Search::Xapian" includes SWIG "Xapian", too:
$opt->{compact} = 0 if !$mods->{'Search::Xapian'};

PublicInbox::Admin::require_or_die(keys %$mods);
my $env = PublicInbox::Admin::index_prepare($opt, $cfg);
local %ENV = (%ENV, %$env) if $env;
require PublicInbox::InboxWritable;
PublicInbox::Xapcmd::check_compact() if $opt->{compact};
PublicInbox::Admin::progress_prepare($opt);
for my $ibx (@ibxs) {
	$ibx = PublicInbox::InboxWritable->new($ibx);
	if ($opt->{compact} >= 2) {
		PublicInbox::Xapcmd::run($ibx, 'compact', $opt->{compact_opt});
	}
	$ibx->{-no_fsync} = 1 if !$opt->{fsync};
	$ibx->{-skip_docdata} //= $opt->{'skip-docdata'};

	my $ibx_opt = $opt;
	if (defined(my $s = $ibx->{lc('indexSequentialShard')})) {
		defined(my $v = $cfg->git_bool($s)) or die <<EOL;
publicInbox.$ibx->{name}.indexSequentialShard not boolean
EOL
		$ibx_opt = { %$opt, sequential_shard => $v };
	}
	my $nidx = PublicInbox::Admin::index_inbox($ibx, undef, $ibx_opt);
	last if $ibx_opt->{quit};
	if (my $copt = $opt->{compact_opt}) {
		local $copt->{jobs} = 0 if $ibx_opt->{sequential_shard};
		PublicInbox::Xapcmd::run($ibx, 'compact', $copt);
	}
	last if $ibx_opt->{quit};
	next if $ibx->{-unconfigured} || !$nidx;
	for my $eidx (@eidx) {
		$eidx->attach_inbox($ibx);
	}
}
my $pr = $opt->{-progress};
for my $eidx (@eidx) {
	$pr->("indexing $eidx->{topdir} ...\n") if $pr;
	$eidx->eidx_sync($opt);
	last if $opt->{quit};
}
debug log:

solving c68f9224 ...
found c68f9224 in https://80x24.org/public-inbox.git

user/dev discussion of public-inbox itself

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://public-inbox.org/meta
	git clone --mirror http://czquwvybam4bgbro.onion/meta
	git clone --mirror http://hjrcffqmbrq6wope.onion/meta
	git clone --mirror http://ou63pmih66umazou.onion/meta

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V1 meta meta/ https://public-inbox.org/meta \
		meta@public-inbox.org
	public-inbox-index meta

Example config snippet for mirrors.
Newsgroups are available over NNTP:
	nntp://news.public-inbox.org/inbox.comp.mail.public-inbox.meta
	nntp://ou63pmih66umazou.onion/inbox.comp.mail.public-inbox.meta
	nntp://czquwvybam4bgbro.onion/inbox.comp.mail.public-inbox.meta
	nntp://hjrcffqmbrq6wope.onion/inbox.comp.mail.public-inbox.meta
	nntp://news.gmane.io/gmane.mail.public-inbox.general
 note: .onion URLs require Tor: https://www.torproject.org/

code repositories for the project(s) associated with this inbox:

	https://80x24.org/public-inbox.git

AGPL code for this site: git clone https://public-inbox.org/public-inbox.git