# Copyright (C) 2017 all contributors
# License: AGPL-3.0+
#
# Read-only search interface for use by the Repobrowse web interface
# RepoGitSearchIdx builds upon this for writing a Xapian DB.
package PublicInbox::RepoGitSearch;
use strict;
use warnings;
use Search::Xapian qw/:standard/;
# values for ranges and sorting
use constant {
CD => 0, # commit date stamp (YYYYMMDD)
AD => 1, # author date stamp (YYYYMMDD)
REPO_SCHEMA_VERSION => 1,
# n.b. FLAG_PURE_NOT is expensive not suitable for a public website
# as it could become a denial-of-service vector
QP_FLAGS => FLAG_PHRASE|FLAG_BOOLEAN|FLAG_LOVEHATE|FLAG_WILDCARD,
};
our $LANG = 'english';
my %bool_pfx_internal = (
type => 'T', # "commit", "tag", or "ref"
);
my %bool_pfx_external = ();
my %prob_prefix = (
id => 'Q', # git object ID, partial matches supported
p => 'XP', # parent commit (partial)
s => 'S', # subject
a => 'A', # Author name + email
c => 'XC', # Committer name + email
ac => 'A XC', # Author and Committer name + email
b => 'XBODY', # commit message body
bs => 'S XBODY', # commit message (subject + body)
diff_fn => 'XDFN', # changed filenames
diff_hdr => 'XDHH', # diff hunk header
diff_ctx => 'XDCTX', # diff context
diff_a => 'XDFA', # diff a/ file (before)
diff_b => 'XDFB', # diff b/ file (after)
diff => 'XDFN XDHH XDCTX XDFA XDFB', # entire diff
preimg => 'XPRE', # blob pre-image (full)
postimg => 'XPOST', # blob post-image (full)
# default:
'' => 'Q XP S A XC XBODY XDFN XDHH XDCTX XDFA XDFB XPRE XPOST',
);
our @HELP = (
's:' => 'match within message subject e.g. s:"a quick brown fox"',
'ad:' => < 'Committer date range as YYYYMMDD, see ad: above',
'b:' => 'match within commit message body',
'bs:' => 'match within the commit message subject and body',
);
chomp @HELP;
sub new {
my ($class, $git_dir, $repo_dir) = @_;
$repo_dir ||= "$git_dir/public-inbox";
my $xdir = "$repo_dir/xr".REPO_SCHEMA_VERSION;
bless { git_dir => $git_dir, xdir => $xdir }, $class;
}
# overriden by RepoGitSearchIdx
sub xdb ($) { $_[0]->{xdb} ||= Search::Xapian::Database->new($_[0]->{xdir}) }
sub retry_reopen ($$) {
my ($self, $cb) = @_;
my $ret;
for (1..3) {
eval { $ret = $cb->() };
return $ret unless $@;
# Exception: The revision being read has been discarded -
# you should call Xapian::Database::reopen()
if (ref($@) eq 'Search::Xapian::DatabaseModifiedError') {
$self->{xdb}->reopen;
} else {
die;
}
}
}
sub _enquire_once ($$$) {
my ($self, $query, $opts) = @_;
my $enq = $self->{enquire} ||= Search::Xapian::Enquire->new($self->xdb);
$enq->set_query($query);
$opts ||= {};
my $desc = !$opts->{asc};
if ($opts->{relevance}) {
$enq->set_sort_by_relevance_then_value(AD, $desc);
} else {
$enq->set_sort_by_value_then_relevance(AD, $desc);
}
my $offset = $opts->{offset} || 0;
my $limit = $opts->{limit} || 50;
$enq->get_mset($offset, $limit);
}
sub _do_enquire ($$$) {
my ($self, $query, $opts) = @_;
retry_reopen($self, sub { _enquire_once($self, $query, $opts) });
}
sub stemmer () { Search::Xapian::Stem->new($LANG) }
# read-only
sub qp ($) {
my ($self) = @_;
my $qp = $self->{query_parser};
return $qp if $qp;
# new parser
$qp = Search::Xapian::QueryParser->new;
$qp->set_default_op(OP_AND);
$qp->set_database($self->xdb);
$qp->set_stemmer(stemmer());
$qp->set_stemming_strategy(STEM_SOME);
$qp->add_valuerangeprocessor(
Search::Xapian::NumberValueRangeProcessor->new(AD, 'ad:'));
$qp->add_valuerangeprocessor(
Search::Xapian::NumberValueRangeProcessor->new(CD, 'cd:'));
while (my ($name, $prefix) = each %bool_pfx_external) {
$qp->add_boolean_prefix($name, $prefix);
}
while (my ($name, $prefix) = each %prob_prefix) {
$qp->add_prefix($name, $_) foreach split(/ /, $prefix);
}
$self->{query_parser} = $qp;
}
# returns begin and end PostingIterator
sub find_docids ($$) {
my ($self, $termval) = @_;
my $db = $self->xdb;
($db->postlist_begin($termval), $db->postlist_end($termval));
}
sub find_unique_docid ($$$) {
my ($self, $termval) = @_;
my ($begin, $end) = find_docids($self, $termval);
return undef if $begin->equal($end); # not found
my $rv = $begin->get_docid;
# sanity check
$begin->inc;
$begin->equal($end) or die "Term '$termval' is not unique\n";
$rv;
}
sub help ($) {
my ($self) = @_;
\@HELP;
}
# read-only
sub query {
my ($self, $query_string, $opts) = @_;
my $query;
$opts ||= {};
unless ($query_string eq '') {
$query = qp($self)->parse_query($query_string, QP_FLAGS);
$opts->{relevance} = 1 unless exists $opts->{relevance};
}
_do_enquire($self, $query, $opts);
}
1;