#!/usr/bin/perl -w # Copyright (C) 2015-2018 all contributors # License: AGPL-3.0+ # Basic tool to create a Xapian search index for a git repository # configured for public-inbox. # Usage with libeatmydata # highly recommended: eatmydata public-inbox-index REPO_DIR use strict; use warnings; use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); my $usage = "public-inbox-index REPO_DIR"; use PublicInbox::Config; use PublicInbox::Admin qw(resolve_repo_dir); my $config = eval { PublicInbox::Config->new } || eval { warn "public-inbox unconfigured for serving, indexing anyways...\n"; undef; }; eval { require PublicInbox::SearchIdx }; if ($@) { print STDERR "Search::Xapian required for $0\n"; exit 1; } my $reindex; my $prune; my $jobs = undef; my %opts = ( '--reindex' => \$reindex, '--jobs|j=i' => \$jobs, '--prune' => \$prune, ); GetOptions(%opts) or die "bad command-line args\n$usage"; die "--jobs must be positive\n" if defined $jobs && $jobs < 0; my @dirs; if (@ARGV) { @dirs = map { resolve_repo_dir($_) } @ARGV; } else { @dirs = (resolve_repo_dir()); } sub usage { print STDERR "Usage: $usage\n"; exit 1 } usage() unless @dirs; defined($config) and $config->each_inbox(sub { my ($ibx) = @_; for my $i (0..$#dirs) { next if $dirs[$i] ne $ibx->{mainrepo}; $dirs[$i] = $ibx; } }); foreach my $dir (@dirs) { if (!ref($dir) && -f "$dir/inbox.lock") { # v2 my $ibx = { mainrepo => $dir, name => 'unnamed' }; $dir = PublicInbox::Inbox->new($ibx); } index_dir($dir); } sub index_dir { my ($repo) = @_; if (!ref $repo && ! -d $repo) { die "$repo does not appear to be an inbox repository\n"; } if (ref($repo) && ($repo->{version} || 1) == 2) { eval { require PublicInbox::V2Writable }; die "v2 requirements not met: $@\n" if $@; my $v2w = eval { $jobs and local $ENV{NPROC} = $jobs; PublicInbox::V2Writable->new($repo); }; if (defined $jobs) { if ($jobs == 0) { $v2w->{parallel} = 0; } else { my $n = $v2w->{partitions}; if ($jobs != ($n + 1)) { warn "Unable to respect --jobs=$jobs, inbox was created with $n partitions\n"; } } } $v2w->index_sync({ reindex => $reindex, prune => $prune }); } else { my $s = PublicInbox::SearchIdx->new($repo, 1); $s->index_sync({ reindex => $reindex }); } }