From a796afa762b6e204f611a69833064bf3656d1a62 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 15 May 2019 01:18:05 +0000 Subject: inbox: add ->over method to ease access One small step towards making installing Xapian optional for v2 and providing more WWW and NNTP functionality without it. --- lib/PublicInbox/Inbox.pm | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm index 286555f6..e3bc1048 100644 --- a/lib/PublicInbox/Inbox.pm +++ b/lib/PublicInbox/Inbox.pm @@ -168,12 +168,20 @@ sub mm { }; } -sub search { - my ($self) = @_; - $self->{search} ||= eval { +sub search ($;$) { + my ($self, $over_only) = @_; + my $srch = $self->{search} ||= eval { _cleanup_later($self); PublicInbox::Search->new($self, $self->{altid}); }; + # TODO: lazily load Xapian + # return $srch if $over_only || eval { $srch->xdb }; + # undef; +} + +sub over ($) { + my $srch = search($_[0], 1) or return; + $srch->{over_ro}; } sub try_cat { @@ -280,7 +288,7 @@ sub nntp_url { sub nntp_usable { my ($self) = @_; - my $ret = $self->mm && $self->search; + my $ret = mm($self) && over($self); $self->{mm} = $self->{search} = undef; $ret; } @@ -322,30 +330,32 @@ sub mid2num($$) { sub smsg_by_mid ($$) { my ($self, $mid) = @_; - my $srch = search($self) or return; + my $over = over($self) or return; # favor the Message-ID we used for the NNTP article number: defined(my $num = mid2num($self, $mid)) or return; - my $smsg = $srch->lookup_article($num) or return; + my $smsg = $over->get_art($num) or return; PublicInbox::SearchMsg::psgi_cull($smsg); } sub msg_by_mid ($$;$) { my ($self, $mid, $ref) = @_; - my $srch = search($self) or + + over($self) or return msg_by_path($self, mid2path($mid), $ref); + my $smsg = smsg_by_mid($self, $mid); $smsg ? msg_by_smsg($self, $smsg, $ref) : undef; } sub recent { my ($self, $opts, $after, $before) = @_; - search($self)->{over_ro}->recent($opts, $after, $before); + over($self)->recent($opts, $after, $before); } sub modified { my ($self) = @_; - if (my $srch = search($self)) { - my $msgs = $srch->{over_ro}->recent({limit => 1}); + if (my $over = over($self)) { + my $msgs = $over->recent({limit => 1}); if (my $smsg = $msgs->[0]) { return $smsg->{ts}; } -- cgit v1.2.3-24-ge0c7 From e981538130dd6d285392f4316328edbadab0d330 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 15 May 2019 01:18:06 +0000 Subject: nntp: use Inbox->over directly None of the NNTP code actually relies on Xapian, anymore. --- lib/PublicInbox/NNTP.pm | 40 ++++++++++++++++++++-------------------- lib/PublicInbox/Search.pm | 11 ----------- 2 files changed, 20 insertions(+), 31 deletions(-) diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm index 5c5df7b0..8cb6c56d 100644 --- a/lib/PublicInbox/NNTP.pm +++ b/lib/PublicInbox/NNTP.pm @@ -326,27 +326,27 @@ sub cmd_newnews ($$$$;$$) { my ($keep, $skip) = split('!', $newsgroups, 2); ngpat2re($keep); ngpat2re($skip); - my @srch; + my @over; foreach my $ng (@{$self->{nntpd}->{grouplist}}) { $ng->{newsgroup} =~ $keep or next; $ng->{newsgroup} =~ $skip and next; - my $srch = $ng->search or next; - push @srch, $srch; + my $over = $ng->over or next; + push @over, $over; }; - return '.' unless @srch; + return '.' unless @over; my $prev = 0; long_response($self, sub { - my $srch = $srch[0]; - my $msgs = $srch->query_ts($ts, $prev); + my $over = $over[0]; + my $msgs = $over->query_ts($ts, $prev); if (scalar @$msgs) { more($self, '<' . join(">\r\n<", map { $_->mid } @$msgs ). '>'); $prev = $msgs->[-1]->{num}; } else { - shift @srch; - if (@srch) { # continue onto next newsgroup + shift @over; + if (@over) { # continue onto next newsgroup $prev = 0; return 1; } else { # break out of the long response. @@ -483,7 +483,7 @@ find_mid: defined $mid or return $err; } found: - my $smsg = $ng->search->{over_ro}->get_art($n) or return $err; + my $smsg = $ng->over->get_art($n) or return $err; my $msg = $ng->msg_by_smsg($smsg) or return $err; my $s = Email::Simple->new($msg); if ($set_headers) { @@ -706,9 +706,9 @@ sub hdr_xref ($$$) { # optimize XHDR Xref [range] for rtin } } -sub search_header_for { - my ($srch, $num, $field) = @_; - my $smsg = $srch->{over_ro}->get_art($num) or return; +sub over_header_for { + my ($over, $num, $field) = @_; + my $smsg = $over->get_art($num) or return; return PublicInbox::SearchMsg::date($smsg) if $field eq 'date'; $smsg->{$field}; } @@ -718,11 +718,11 @@ sub hdr_searchmsg ($$$$) { if (defined $range && $range =~ /\A<(.+)>\z/) { # Message-ID my ($ng, $n) = mid_lookup($self, $1); return r430 unless defined $n; - my $v = search_header_for($ng->search, $n, $field); + my $v = over_header_for($ng->over, $n, $field); hdr_mid_response($self, $xhdr, $ng, $n, $range, $v); } else { # numeric range $range = $self->{article} unless defined $range; - my $srch = $self->{ng}->search; + my $over = $self->{ng}->over; my $mm = $self->{ng}->mm; my $r = get_range($self, $range); return $r unless ref $r; @@ -730,7 +730,7 @@ sub hdr_searchmsg ($$$$) { more($self, $xhdr ? r221 : r225); my $cur = $beg; long_response($self, sub { - my $msgs = $srch->query_xover($cur, $end); + my $msgs = $over->query_xover($cur, $end); my $nr = scalar @$msgs or return; my $tmp = ''; foreach my $s (@$msgs) { @@ -810,11 +810,11 @@ sub cmd_xrover ($;$) { return $r unless ref $r; my ($beg, $end) = @$r; my $mm = $ng->mm; - my $srch = $ng->search; + my $over = $ng->over; more($self, '224 Overview information follows'); long_response($self, sub { - my $h = search_header_for($srch, $beg, 'references'); + my $h = over_header_for($over, $beg, 'references'); more($self, "$beg $h") if defined($h); $beg++ < $end; }); @@ -842,7 +842,7 @@ sub cmd_over ($;$) { if ($range && $range =~ /\A<(.+)>\z/) { my ($ng, $n) = mid_lookup($self, $1); defined $n or return r430; - my $smsg = $ng->search->{over_ro}->get_art($n) or return r430; + my $smsg = $ng->over->get_art($n) or return r430; more($self, '224 Overview information follows (multi-line)'); # Only set article number column if it's the current group @@ -862,10 +862,10 @@ sub cmd_xover ($;$) { return $r unless ref $r; my ($beg, $end) = @$r; more($self, "224 Overview information follows for $beg to $end"); - my $srch = $self->{ng}->search; + my $over = $self->{ng}->over; my $cur = $beg; long_response($self, sub { - my $msgs = $srch->query_xover($cur, $end); + my $msgs = $over->query_xover($cur, $end); my $nr = scalar @$msgs or return; # OVERVIEW.FMT diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index 090d998b..760c660b 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -293,17 +293,6 @@ EOF $self->{query_parser} = $qp; } -# only used for NNTP server -sub query_xover { - my ($self, $beg, $end, $offset) = @_; - $self->{over_ro}->query_xover($beg, $end, $offset); -} - -sub query_ts { - my ($self, $ts, $prev) = @_; - $self->{over_ro}->query_ts($ts, $prev); -} - sub lookup_article { my ($self, $num) = @_; $self->{over_ro}->get_art($num); -- cgit v1.2.3-24-ge0c7 From 70caf43a131fc5bdf7104f82f2acee9d5353d6a8 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 15 May 2019 01:18:07 +0000 Subject: www: use Inbox->over where appropriate We don't need to rely on Xapian search functionality for the majority of the WWW code, even. subject_normalized is moved to SearchMsg, where it (probably) makes more sense, anyways. --- lib/PublicInbox/Feed.pm | 11 +++++---- lib/PublicInbox/Inbox.pm | 1 + lib/PublicInbox/Mbox.pm | 25 +++++++++++--------- lib/PublicInbox/OverIdx.pm | 13 +++++++++-- lib/PublicInbox/Search.pm | 30 ------------------------ lib/PublicInbox/SearchMsg.pm | 14 ++++++++++++ lib/PublicInbox/SearchView.pm | 13 ++++++----- lib/PublicInbox/View.pm | 53 ++++++++++++++++++++----------------------- lib/PublicInbox/WWW.pm | 32 ++++++++------------------ t/search.t | 4 ++-- t/v2writable.t | 2 +- t/view.t | 1 + 12 files changed, 90 insertions(+), 109 deletions(-) diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm index b373a1eb..a04838a1 100644 --- a/lib/PublicInbox/Feed.pm +++ b/lib/PublicInbox/Feed.pm @@ -27,10 +27,10 @@ sub generate { sub generate_thread_atom { my ($ctx) = @_; my $mid = $ctx->{mid}; - my $msgs = $ctx->{srch}->get_thread($mid); + my $ibx = $ctx->{-inbox}; + my $msgs = $ibx->over->get_thread($mid); return _no_thread() unless @$msgs; - my $ibx = $ctx->{-inbox}; my $html_url = $ibx->base_url($ctx->{env}); $html_url .= PublicInbox::Hval->new_msgid($mid)->{href}; $ctx->{-html_url} = $html_url; @@ -46,12 +46,13 @@ sub generate_html_index { # if the 'r' query parameter is given, it is a legacy permalink # which we must continue supporting: my $qp = $ctx->{qp}; - if ($qp && !$qp->{r} && $ctx->{srch}) { + my $ibx = $ctx->{-inbox}; + if ($qp && !$qp->{r} && $ibx->over) { return PublicInbox::View::index_topics($ctx); } my $env = $ctx->{env}; - my $url = $ctx->{-inbox}->base_url($env) . 'new.html'; + my $url = $ibx->base_url($env) . 'new.html'; my $qs = $env->{QUERY_STRING}; $url .= "?$qs" if $qs ne ''; [302, [ 'Location', $url, 'Content-Type', 'text/plain'], @@ -94,7 +95,7 @@ sub recent_msgs { if ($v > 2) { die "BUG: unsupported inbox version: $v\n"; } - if (my $srch = $ibx->search) { + if ($ibx->over) { return PublicInbox::View::paginate_recent($ctx, $max); } diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm index e3bc1048..dc186b73 100644 --- a/lib/PublicInbox/Inbox.pm +++ b/lib/PublicInbox/Inbox.pm @@ -172,6 +172,7 @@ sub search ($;$) { my ($self, $over_only) = @_; my $srch = $self->{search} ||= eval { _cleanup_later($self); + require PublicInbox::Search; PublicInbox::Search->new($self, $self->{altid}); }; # TODO: lazily load Xapian diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm index 78dbe27e..15200d3a 100644 --- a/lib/PublicInbox/Mbox.pm +++ b/lib/PublicInbox/Mbox.pm @@ -45,7 +45,7 @@ sub getline { } $cur = $next or return; my $ibx = $ctx->{-inbox}; - $next = $ibx->search->next_by_mid($ctx->{mid}, \$id, \$prev); + $next = $ibx->over->next_by_mid($ctx->{mid}, \$id, \$prev); @$more = ($ctx, $id, $prev, $next); # $next may be undef, here my $mref = $ibx->msg_by_smsg($cur) or return; msg_str($ctx, Email::Simple->new($mref)); @@ -59,12 +59,12 @@ sub emit_raw { my $ibx = $ctx->{-inbox}; my $first; my $more; - if (my $srch = $ibx->search) { + if (my $over = $ibx->over) { my ($id, $prev); - my $smsg = $srch->next_by_mid($mid, \$id, \$prev) or return; + my $smsg = $over->next_by_mid($mid, \$id, \$prev) or return; my $mref = $ibx->msg_by_smsg($smsg) or return; $first = Email::Simple->new($mref); - my $next = $srch->next_by_mid($mid, \$id, \$prev); + my $next = $over->next_by_mid($mid, \$id, \$prev); # $more is for ->getline $more = [ $ctx, $id, $prev, $next, $first ] if $next; } else { @@ -130,11 +130,11 @@ sub msg_str { } sub thread_mbox { - my ($ctx, $srch, $sfx) = @_; + my ($ctx, $over, $sfx) = @_; eval { require IO::Compress::Gzip }; return sub { need_gzip(@_) } if $@; my $mid = $ctx->{mid}; - my $msgs = $srch->get_thread($mid, {}); + my $msgs = $over->get_thread($mid, {}); return [404, [qw(Content-Type text/plain)], []] if !@$msgs; my $prev = $msgs->[-1]; my $i = 0; @@ -144,7 +144,7 @@ sub thread_mbox { return $smsg; } # refill result set - $msgs = $srch->get_thread($mid, $prev); + $msgs = $over->get_thread($mid, $prev); return unless @$msgs; $prev = $msgs->[-1]; $i = 0; @@ -168,17 +168,19 @@ sub emit_range { sub mbox_all_ids { my ($ctx) = @_; my $prev = 0; - my $ids = $ctx->{-inbox}->mm->ids_after(\$prev) or return + my $ibx = $ctx->{-inbox}; + my $ids = $ibx->mm->ids_after(\$prev) or return [404, [qw(Content-Type text/plain)], ["No results found\n"]]; my $i = 0; - my $over = $ctx->{srch}->{over_ro}; + my $over = $ibx->over or + return PublicInbox::WWW::need($ctx, 'Overview'); my $cb = sub { do { while ((my $num = $ids->[$i++])) { my $smsg = $over->get_art($num) or next; return $smsg; } - $ids = $ctx->{-inbox}->mm->ids_after(\$prev); + $ids = $ibx->mm->ids_after(\$prev); $i = 0; } while (@$ids); undef; @@ -193,7 +195,8 @@ sub mbox_all { return sub { need_gzip(@_) } if $@; return mbox_all_ids($ctx) if $query eq ''; my $opts = { mset => 2 }; - my $srch = $ctx->{srch}; + my $srch = $ctx->{-inbox}->search or + return PublicInbox::WWW::need($ctx, 'Search');; my $mset = $srch->query($query, $opts); $opts->{offset} = $mset->size or return [404, [qw(Content-Type text/plain)], diff --git a/lib/PublicInbox/OverIdx.pm b/lib/PublicInbox/OverIdx.pm index bb3068dd..9fc51e5f 100644 --- a/lib/PublicInbox/OverIdx.pm +++ b/lib/PublicInbox/OverIdx.pm @@ -14,7 +14,7 @@ use base qw(PublicInbox::Over); use IO::Handle; use DBI qw(:sql_types); # SQL_BLOB use PublicInbox::MID qw/id_compress mids references/; -use PublicInbox::SearchMsg; +use PublicInbox::SearchMsg qw(subject_normalized); use Compress::Zlib qw(compress); use PublicInbox::Search; @@ -237,6 +237,15 @@ sub parse_references ($$$) { \@keep; } +# normalize subjects so they are suitable as pathnames for URLs +# XXX: consider for removal +sub subject_path ($) { + my ($subj) = @_; + $subj = subject_normalized($subj); + $subj =~ s![^a-zA-Z0-9_\.~/\-]+!_!g; + lc($subj); +} + sub add_overview { my ($self, $mime, $bytes, $num, $oid, $mid0) = @_; my $lines = $mime->body_raw =~ tr!\n!\n!; @@ -252,7 +261,7 @@ sub add_overview { my $subj = $smsg->subject; my $xpath; if ($subj ne '') { - $xpath = PublicInbox::Search::subject_path($subj); + $xpath = subject_path($subj); $xpath = id_compress($xpath); } my $dd = $smsg->to_doc_data($oid, $mid0); diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index 760c660b..e79ec0f8 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -20,7 +20,6 @@ use PublicInbox::Over; # This is English-only, everything else is non-standard and may be confused as # a prefix common in patch emails -our $REPLY_RE = qr/^re:\s+/i; our $LANG = 'english'; use constant { @@ -182,11 +181,6 @@ sub query { } } -sub get_thread { - my ($self, $mid, $prev) = @_; - $self->{over_ro}->get_thread($mid, $prev); -} - sub retry_reopen { my ($self, $cb) = @_; for my $i (1..10) { @@ -298,30 +292,6 @@ sub lookup_article { $self->{over_ro}->get_art($num); } -sub next_by_mid { - my ($self, $mid, $id, $prev) = @_; - $self->{over_ro}->next_by_mid($mid, $id, $prev); -} - -# normalize subjects so they are suitable as pathnames for URLs -# XXX: consider for removal -sub subject_path { - my $subj = pop; - $subj = subject_normalized($subj); - $subj =~ s![^a-zA-Z0-9_\.~/\-]+!_!g; - lc($subj); -} - -sub subject_normalized { - my $subj = pop; - $subj =~ s/\A\s+//s; # no leading space - $subj =~ s/\s+\z//s; # no trailing space - $subj =~ s/\s+/ /gs; # no redundant spaces - $subj =~ s/\.+\z//; # no trailing '.' - $subj =~ s/$REPLY_RE//igo; # remove reply prefix - $subj; -} - sub help { my ($self) = @_; $self->qp; # parse altids diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm index ceb6edad..5f3c8af8 100644 --- a/lib/PublicInbox/SearchMsg.pm +++ b/lib/PublicInbox/SearchMsg.pm @@ -8,6 +8,8 @@ package PublicInbox::SearchMsg; use strict; use warnings; +use base qw(Exporter); +our @EXPORT_OK = qw(subject_normalized); use PublicInbox::MID qw/mid_clean mid_mime/; use PublicInbox::Address; use PublicInbox::MsgTime qw(msg_timestamp msg_datestamp); @@ -185,4 +187,16 @@ sub mid ($;$) { sub _extract_mid { mid_clean(mid_mime($_[0]->{mime})) } +our $REPLY_RE = qr/^re:\s+/i; + +sub subject_normalized ($) { + my ($subj) = @_; + $subj =~ s/\A\s+//s; # no leading space + $subj =~ s/\s+\z//s; # no trailing space + $subj =~ s/\s+/ /gs; # no redundant spaces + $subj =~ s/\.+\z//; # no trailing '.' + $subj =~ s/$REPLY_RE//igo; # remove reply prefix + $subj; +} + 1; diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm index 3b3afdee..6592b3b2 100644 --- a/lib/PublicInbox/SearchView.pm +++ b/lib/PublicInbox/SearchView.pm @@ -29,6 +29,8 @@ sub mbox_results { sub sres_top_html { my ($ctx) = @_; + my $srch = $ctx->{-inbox}->search or + return PublicInbox::WWW::need($ctx, 'Search'); my $q = PublicInbox::SearchQuery->new($ctx->{qp}); my $x = $q->{x}; my $query = $q->{'q'}; @@ -44,7 +46,7 @@ sub sres_top_html { my ($mset, $total, $err, $cb); retry: eval { - $mset = $ctx->{srch}->query($query, $opts); + $mset = $srch->query($query, $opts); $total = $mset->get_matches_estimated; }; $err = $@; @@ -98,8 +100,8 @@ sub mset_summary { my $pad = length("$total"); my $pfx = ' ' x $pad; my $res = \($ctx->{-html_tip}); - my $srch = $ctx->{srch}; my $ibx = $ctx->{-inbox}; + my $srch = $ibx->search; my $obfs_ibx = $ibx->{obfuscate} ? $ibx : undef; foreach my $m ($mset->items) { my $rank = sprintf("%${pad}d", $m->get_rank + 1); @@ -220,8 +222,8 @@ sub sort_relevance { sub mset_thread { my ($ctx, $mset, $q) = @_; my %pct; - my $srch = $ctx->{srch}; - my $msgs = $srch->retry_reopen(sub { [ map { + my $ibx = $ctx->{-inbox}; + my $msgs = $ibx->search->retry_reopen(sub { [ map { my $i = $_; my $smsg = PublicInbox::SearchMsg->load_doc($i->get_document); $pct{$smsg->mid} = $i->get_percent; @@ -232,7 +234,6 @@ sub mset_thread { $r ? sort_relevance(\%pct) : *PublicInbox::View::sort_ds, $ctx); my $skel = search_nav_bot($mset, $q). "
";
-	my $ibx = $ctx->{-inbox};
 	$ctx->{-upfx} = '';
 	$ctx->{anchor_idx} = 1;
 	$ctx->{cur_level} = 0;
@@ -286,7 +287,7 @@ sub adump {
 	my $ibx = $ctx->{-inbox};
 	my @items = $mset->items;
 	$ctx->{search_query} = $q;
-	my $srch = $ctx->{srch};
+	my $srch = $ibx->search;
 	PublicInbox::WwwAtomStream->response($ctx, 200, sub {
 		while (my $x = shift @items) {
 			$x = load_doc_retry($srch, $x);
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 47a2046e..09afdaf1 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -18,7 +18,7 @@ use PublicInbox::Reply;
 use PublicInbox::ViewDiff qw(flush_diff);
 require POSIX;
 use Time::Local qw(timegm);
-
+use PublicInbox::SearchMsg qw(subject_normalized);
 use constant COLS => 72;
 use constant INDENT => '  ';
 use constant TCHILD => '` ';
@@ -63,12 +63,12 @@ sub msg_page {
 	my $ibx = $ctx->{-inbox};
 	my ($first, $more);
 	my $smsg;
-	if (my $srch = $ibx->search) {
+	if (my $over = $ibx->over) {
 		my ($id, $prev);
-		$smsg = $srch->next_by_mid($mid, \$id, \$prev);
+		$smsg = $over->next_by_mid($mid, \$id, \$prev);
 		$first = $ibx->msg_by_smsg($smsg) if $smsg;
 		if ($first) {
-			my $next = $srch->next_by_mid($mid, \$id, \$prev);
+			my $next = $over->next_by_mid($mid, \$id, \$prev);
 			$more = [ $id, $prev, $next ] if $next;
 		}
 		return unless $first;
@@ -85,7 +85,7 @@ sub msg_html_more {
 		my $mid = $ctx->{mid};
 		my $ibx = $ctx->{-inbox};
 		$smsg = $ibx->smsg_mime($smsg);
-		my $next = $ctx->{srch}->next_by_mid($mid, \$id, \$prev);
+		my $next = $ibx->over->next_by_mid($mid, \$id, \$prev);
 		@$more = $next ? ($id, $prev, $next) : ();
 		if ($smsg) {
 			my $mime = $smsg->{mime};
@@ -203,7 +203,6 @@ sub nr_to_s ($$$) {
 # this is already inside a 
 sub index_entry {
 	my ($smsg, $ctx, $more) = @_;
-	my $srch = $ctx->{srch};
 	my $subj = $smsg->subject;
 	my $mid_raw = $smsg->mid;
 	my $id = id_compress($mid_raw, 1);
@@ -440,8 +439,8 @@ sub stream_thread ($$) {
 sub thread_html {
 	my ($ctx) = @_;
 	my $mid = $ctx->{mid};
-	my $srch = $ctx->{srch};
-	my ($nr, $msgs) = $srch->get_thread($mid);
+	my $ibx = $ctx->{-inbox};
+	my ($nr, $msgs) = $ibx->over->get_thread($mid);
 	return missing_thread($ctx) if $nr == 0;
 	my $skel = '
';
 	$skel .= $nr == 1 ? 'only message in thread' : 'end of thread';
@@ -464,7 +463,6 @@ sub thread_html {
 	my $rootset = thread_results($ctx, $msgs);
 
 	# reduce hash lookups in pre_thread->skel_dump
-	my $ibx = $ctx->{-inbox};
 	$ctx->{-obfs_ibx} = $ibx->{obfuscate} ? $ibx : undef;
 	walk_thread($rootset, $ctx, *pre_thread);
 
@@ -627,8 +625,8 @@ sub add_text_body {
 
 sub _msg_html_prepare {
 	my ($hdr, $ctx, $more, $nr) = @_;
-	my $srch = $ctx->{srch} if $ctx;
 	my $atom = '';
+	my $over = $ctx->{-inbox}->over;
 	my $obfs_ibx = $ctx->{-obfs_ibx};
 	my $rv = '';
 	my $mids = mids($hdr);
@@ -642,7 +640,7 @@ sub _msg_html_prepare {
 	} else {
 		$rv .= '
';
 	}
-	if ($srch) {
+	if ($over) {
 		$ctx->{-upfx} = '../';
 	}
 	my @title;
@@ -668,14 +666,14 @@ sub _msg_html_prepare {
 	if (defined($v = $hdr->header('Subject')) && ($v ne '')) {
 		$v = ascii_html($v);
 		obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx;
-		if ($srch) {
+		if ($over) {
 			$rv .= qq(Subject: $v\n);
 		} else {
 			$rv .= "Subject: $v\n";
 		}
 		$title[0] = $v;
 	} else { # dummy anchor for thread skeleton at bottom of page
-		$rv .= qq() if $srch;
+		$rv .= qq() if $over;
 		$title[0] = '(no subject)';
 	}
 	if (defined($v = $hdr->header('Date'))) {
@@ -698,15 +696,15 @@ sub _msg_html_prepare {
 			$rv .= "(raw)\n";
 		}
 	}
-	$rv .= _parent_headers($hdr, $srch);
+	$rv .= _parent_headers($hdr, $over);
 	$rv .= "\n";
 }
 
 sub thread_skel {
 	my ($dst, $ctx, $hdr, $tpfx) = @_;
-	my $srch = $ctx->{srch};
 	my $mid = mids($hdr)->[0];
-	my ($nr, $msgs) = $srch->get_thread($mid);
+	my $ibx = $ctx->{-inbox};
+	my ($nr, $msgs) = $ibx->over->get_thread($mid);
 	my $expand = qq(expand[flat) .
 	                qq(|nested]  ) .
 			qq(mbox.gz  ) .
@@ -732,14 +730,13 @@ sub thread_skel {
 	my $subj = $hdr->header('Subject');
 	defined $subj or $subj = '';
 	$subj = '(no subject)' if $subj eq '';
-	$ctx->{prev_subj} = [ split(/ /, $srch->subject_normalized($subj)) ];
+	$ctx->{prev_subj} = [ split(/ /, subject_normalized($subj)) ];
 	$ctx->{cur} = $mid;
 	$ctx->{prev_attr} = '';
 	$ctx->{prev_level} = 0;
 	$ctx->{dst} = $dst;
 
 	# reduce hash lookups in skel_dump
-	my $ibx = $ctx->{-inbox};
 	$ctx->{-obfs_ibx} = $ibx->{obfuscate} ? $ibx : undef;
 	walk_thread(thread_results($ctx, $msgs), $ctx, *skel_dump);
 
@@ -747,7 +744,7 @@ sub thread_skel {
 }
 
 sub _parent_headers {
-	my ($hdr, $srch) = @_;
+	my ($hdr, $over) = @_;
 	my $rv = '';
 
 	my $refs = references($hdr);
@@ -762,10 +759,10 @@ sub _parent_headers {
 
 	# do not display References: if search is present,
 	# we show the thread skeleton at the bottom, instead.
-	return $rv if $srch;
+	return $rv if $over;
 
 	if (@$refs) {
-		@$refs = map { linkify_ref_nosrch($_) } @$refs;
+		@$refs = map { linkify_ref_no_over($_) } @$refs;
 		$rv .= 'References: '. join("\n\t", @$refs) . "\n";
 	}
 	$rv;
@@ -774,12 +771,12 @@ sub _parent_headers {
 sub html_footer {
 	my ($hdr, $standalone, $ctx, $rhref) = @_;
 
-	my $srch = $ctx->{srch} if $ctx;
+	my $ibx = $ctx->{-inbox} if $ctx;
 	my $upfx = '../';
 	my $tpfx = '';
 	my $idx = $standalone ? " index" : '';
 	my $irt = '';
-	if ($idx && $srch) {
+	if ($idx && $ibx->over) {
 		$idx .= "\n";
 		thread_skel(\$idx, $ctx, $hdr, $tpfx);
 		my ($next, $prev);
@@ -819,7 +816,7 @@ sub html_footer {
 	$irt .= $idx;
 }
 
-sub linkify_ref_nosrch {
+sub linkify_ref_no_over {
 	my $v = PublicInbox::Hval->new_msgid($_[0]);
 	my $html = $v->as_html;
 	my $href = $v->{href};
@@ -965,7 +962,7 @@ sub skel_dump {
 	# Subject is never undef, this mail was loaded from
 	# our Xapian which would've resulted in '' if it were
 	# really missing (and Filter rejects empty subjects)
-	my @subj = split(/ /, $ctx->{srch}->subject_normalized($smsg->subject));
+	my @subj = split(/ /, subject_normalized($smsg->subject));
 
 	# remove common suffixes from the subject if it matches the previous,
 	# so we do not show redundant text at the end.
@@ -1034,14 +1031,13 @@ sub sort_ds {
 # returns 200 if done, 404 if not
 sub acc_topic {
 	my ($ctx, $level, $node) = @_;
-	my $srch = $ctx->{srch};
 	my $mid = $node->{id};
 	my $x = $node->{smsg} || $ctx->{-inbox}->smsg_by_mid($mid);
 	my ($subj, $ds);
 	my $topic;
 	if ($x) {
 		$subj = $x->subject;
-		$subj = $srch->subject_normalized($subj);
+		$subj = subject_normalized($subj);
 		$subj = '(no subject)' if $subj eq '';
 		$ds = $x->ds;
 		if ($level == 0) {
@@ -1081,7 +1077,6 @@ sub dump_topics {
 	my @out;
 	my $ibx = $ctx->{-inbox};
 	my $obfs_ibx = $ibx->{obfuscate} ? $ibx : undef;
-	my $srch = $ctx->{srch};
 
 	# sort by recency, this allows new posts to "bump" old topics...
 	foreach my $topic (sort { $b->[0] <=> $a->[0] } @$order) {
@@ -1113,7 +1108,7 @@ sub dump_topics {
 			my $level = $ex[$i];
 			my $subj = $ex[$i + 1];
 			$mid = delete $seen->{$subj};
-			my @subj = split(/ /, $srch->subject_normalized($subj));
+			my @subj = split(/ /, subject_normalized($subj));
 			my @next_prev = @subj; # full copy
 			my $omit = dedupe_subject($prev_subj, \@subj, ' "');
 			$prev_subj = \@next_prev;
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index 1f3ca157..0f963dcb 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -168,7 +168,6 @@ sub r404 {
 	my ($ctx) = @_;
 	if ($ctx && $ctx->{mid}) {
 		require PublicInbox::ExtMsg;
-		searcher($ctx);
 		return PublicInbox::ExtMsg::ext_msg($ctx);
 	}
 	r(404, 'Not Found');
@@ -239,7 +238,6 @@ sub get_new {
 sub get_index {
 	my ($ctx) = @_;
 	require PublicInbox::Feed;
-	searcher($ctx);
 	if ($ctx->{env}->{QUERY_STRING} =~ /(?:\A|[&;])q=/) {
 		require PublicInbox::SearchView;
 		PublicInbox::SearchView::sres_top_html($ctx);
@@ -259,14 +257,13 @@ sub get_mid_txt {
 sub get_mid_html {
 	my ($ctx) = @_;
 	require PublicInbox::View;
-	searcher($ctx);
 	PublicInbox::View::msg_page($ctx) || r404($ctx);
 }
 
 # /$INBOX/$MESSAGE_ID/t/
 sub get_thread {
 	my ($ctx, $flat) = @_;
-	searcher($ctx) or return need_search($ctx);
+	$ctx->{-inbox}->over or return need($ctx, 'Overview');
 	$ctx->{flat} = $flat;
 	require PublicInbox::View;
 	PublicInbox::View::thread_html($ctx);
@@ -303,21 +300,11 @@ sub ctx_get {
 	$val;
 }
 
-# search support is optional, returns undef if Xapian is not installed
-# or not configured for the given GIT_DIR
-sub searcher {
-	my ($ctx) = @_;
-	eval {
-		require PublicInbox::Search;
-		$ctx->{srch} = $ctx->{-inbox}->search;
-	};
-}
-
-sub need_search {
-	my ($ctx) = @_;
+sub need {
+	my ($ctx, $extra) = @_;
 	my $msg = <Search not available for this
-public-inbox
Search is not available for this public-inbox
+$extra not available for this
+public-inbox
$extra is not available for this public-inbox
 Return to index
EOF [ 501, [ 'Content-Type' => 'text/html; charset=UTF-8' ], [ $msg ] ]; @@ -330,16 +317,16 @@ EOF # especially on older systems. Stick to zlib since that's what git uses. sub get_thread_mbox { my ($ctx, $sfx) = @_; - my $srch = searcher($ctx) or return need_search($ctx); + my $over = $ctx->{-inbox}->over or return need($ctx, 'Overview'); require PublicInbox::Mbox; - PublicInbox::Mbox::thread_mbox($ctx, $srch, $sfx); + PublicInbox::Mbox::thread_mbox($ctx, $over, $sfx); } # /$INBOX/$MESSAGE_ID/t.atom -> thread as Atom feed sub get_thread_atom { my ($ctx) = @_; - searcher($ctx) or return need_search($ctx); + $ctx->{-inbox}->over or return need($ctx, 'Overview'); require PublicInbox::Feed; PublicInbox::Feed::generate_thread_atom($ctx); } @@ -453,7 +440,7 @@ sub serve_git { sub mbox_results { my ($ctx) = @_; if ($ctx->{env}->{QUERY_STRING} =~ /(?:\A|[&;])q=/) { - searcher($ctx) or return need_search($ctx); + $ctx->{-inbox}->search or return need($ctx, 'search'); require PublicInbox::SearchView; return PublicInbox::SearchView::mbox_results($ctx); } @@ -464,7 +451,6 @@ sub serve_mbox_range { my ($ctx, $inbox, $range) = @_; invalid_inbox($ctx, $inbox) || eval { require PublicInbox::Mbox; - searcher($ctx); PublicInbox::Mbox::emit_range($ctx, $range); } } diff --git a/t/search.t b/t/search.t index 35d71473..493e00de 100644 --- a/t/search.t +++ b/t/search.t @@ -223,7 +223,7 @@ $ibx->with_umask(sub { $rw_commit->(); $ro->reopen; - my $t = $ro->get_thread('root@s'); + my $t = $ro->{over_ro}->get_thread('root@s'); is(scalar(@$t), 4, "got all 4 mesages in thread"); my @exp = sort($long_reply_mid, 'root@s', 'last@s', $long_mid); @res = filter_mids($t); @@ -422,7 +422,7 @@ $ibx->with_umask(sub { if (scalar(@$n) >= 1) { my $mid = $n->[0]->mid; my ($id, $prev); - $art = $ro->next_by_mid($mid, \$id, \$prev); + $art = $ro->{over_ro}->next_by_mid($mid, \$id, \$prev); ok($art, 'article exists in OVER DB'); } $rw->unindex_blob($amsg); diff --git a/t/v2writable.t b/t/v2writable.t index 5f34d127..b0f88d27 100644 --- a/t/v2writable.t +++ b/t/v2writable.t @@ -258,7 +258,7 @@ EOF ok($im->add($mime), 'add excessively long References'); $im->barrier; - my $msgs = $ibx->search->reopen->get_thread('x'x244); + my $msgs = $ibx->search->{over_ro}->get_thread('x'x244); is(2, scalar(@$msgs), 'got both messages'); is($msgs->[0]->{mid}, 'x'x244, 'stored truncated mid'); is($msgs->[1]->{references}, '<'.('x'x244).'>', 'stored truncated ref'); diff --git a/t/view.t b/t/view.t index ef7d6958..07829543 100644 --- a/t/view.t +++ b/t/view.t @@ -13,6 +13,7 @@ my $ctx = { env => { HTTP_HOST => 'example.com', 'psgi.url_scheme' => 'http' }, -inbox => Plack::Util::inline_object( name => 'test', + over => sub { undef }, search => sub { undef }, base_url => sub { 'http://example.com/' }, cloneurl => sub {[]}, -- cgit v1.2.3-24-ge0c7 From 0b1de991a099b5e8b9a9e3e85b5eaaacc9362dbb Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 15 May 2019 01:18:08 +0000 Subject: lazy load Xapian and make it optional for v2 More tests work without Search::Xapian, now. Usability issues still need to be fixed --- INSTALL | 2 +- TODO | 2 -- ci/deps.perl | 7 +--- ci/profiles.sh | 3 +- lib/PublicInbox/Inbox.pm | 19 +++++----- lib/PublicInbox/Search.pm | 84 ++++++++++++++++++++++++++------------------ lib/PublicInbox/SearchIdx.pm | 50 +++++++++++++++++--------- lib/PublicInbox/WWW.pm | 7 ++-- script/public-inbox-index | 25 +++++++++---- script/public-inbox-init | 1 + script/public-inbox-purge | 5 +++ t/admin.t | 2 +- t/cgi.t | 16 ++++++--- t/indexlevels-mirror.t | 17 +++++---- t/nntp.t | 2 +- t/nntpd.t | 15 +++++--- t/over.t | 3 +- t/psgi_bad_mids.t | 3 +- t/psgi_scan_all.t | 4 +-- t/psgi_search.t | 4 +-- t/purge.t | 2 +- t/search-thr-index.t | 5 +-- t/search.t | 7 ++-- t/v1reindex.t | 3 +- t/v2reindex.t | 5 +-- t/watch_maildir_v2.t | 4 +-- 26 files changed, 184 insertions(+), 113 deletions(-) diff --git a/INSTALL b/INSTALL index 313a2951..72e0763c 100644 --- a/INSTALL +++ b/INSTALL @@ -69,7 +69,7 @@ Numerous optional modules are likely to be useful as well: - Search::Xapian deb: libsearch-xapian-perl pkg: p5-Search-Xapian rpm: perl-Search-Xapian - (for v2, HTTP search) + (HTTP search) - Net::Server deb: libnet-server-perl pkg: pkg-Net-Server diff --git a/TODO b/TODO index d947b0ff..4953439e 100644 --- a/TODO +++ b/TODO @@ -69,8 +69,6 @@ all need to be considered for everything we introduce) * large mbox/Maildir/MH/NNTP spool import (see PublicInbox::Import) -* Allow NNTP and more of PSGI code to work without Xapian - * Read-only WebDAV interface to the git repo so it can be mounted via davfs2 or fusedav to avoid full clones. davfs2 needs Range: request support for this to be feasible: diff --git a/ci/deps.perl b/ci/deps.perl index e0fda011..32b0226e 100755 --- a/ci/deps.perl +++ b/ci/deps.perl @@ -70,12 +70,7 @@ if (@precious) { # bare minimum for v2 -$profiles->{v2essential} = [ @{$profiles->{essential}}, qw( - DBD::SQLite - DBI - Search::Xapian - xapian-compact -) ]; +$profiles->{v2essential} = [ @{$profiles->{essential}}, qw(DBD::SQLite DBI) ]; # package names which can't be mapped automatically: my $non_auto = { diff --git a/ci/profiles.sh b/ci/profiles.sh index 31140f35..5d7c2cf6 100755 --- a/ci/profiles.sh +++ b/ci/profiles.sh @@ -62,8 +62,9 @@ essential devtest- EOF ;; debian-sid|debian-9|debian-10) sed "s/^/$PKG_FMT /" <{$f} if SvREFCNT($ibx->{$f}) == 1; } my $expire = time - 60; @@ -37,7 +37,7 @@ sub cleanup_task () { $again = 1 if $git->cleanup($expire); } } - $again ||= !!($ibx->{mm} || $ibx->{search}); + $again ||= !!($ibx->{over} || $ibx->{mm} || $ibx->{search}); $next->{"$ibx"} = $ibx if $again; } $CLEANUP = $next; @@ -175,14 +175,17 @@ sub search ($;$) { require PublicInbox::Search; PublicInbox::Search->new($self, $self->{altid}); }; - # TODO: lazily load Xapian - # return $srch if $over_only || eval { $srch->xdb }; - # undef; + ($over_only || eval { $srch->xdb }) ? $srch : undef; } sub over ($) { - my $srch = search($_[0], 1) or return; - $srch->{over_ro}; + my ($self) = @_; + my $srch = search($self, 1) or return; + $self->{over} ||= eval { + my $over = $srch->{over_ro}; + $over->dbh_new; # may fail + $over; + } } sub try_cat { @@ -290,7 +293,7 @@ sub nntp_url { sub nntp_usable { my ($self) = @_; my $ret = mm($self) && over($self); - $self->{mm} = $self->{search} = undef; + $self->{mm} = $self->{over} = $self->{search} = undef; $ret; } diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index e79ec0f8..b1e62f4c 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -12,11 +12,21 @@ use constant TS => 0; # Received: header in Unix time use constant YYYYMMDD => 1; # Date: header for searching in the WWW UI use constant DT => 2; # Date: YYYYMMDDHHMMSS -use Search::Xapian qw/:standard/; use PublicInbox::SearchMsg; use PublicInbox::MIME; use PublicInbox::MID qw/id_compress/; use PublicInbox::Over; +my $QP_FLAGS; +sub load_xapian () { + $QP_FLAGS ||= eval { + require Search::Xapian; + Search::Xapian->import(qw(:standard)); + + # n.b. FLAG_PURE_NOT is expensive not suitable for a public + # website as it could become a denial-of-service vector + FLAG_PHRASE()|FLAG_BOOLEAN()|FLAG_LOVEHATE()|FLAG_WILDCARD(); + }; +}; # This is English-only, everything else is non-standard and may be confused as # a prefix common in patch emails @@ -41,10 +51,6 @@ use constant { # (commit 83425ef12e4b65cdcecd11ddcb38175d4a91d5a0) # 14 - fix ghost root vivification SCHEMA_VERSION => 15, - - # n.b. FLAG_PURE_NOT is expensive not suitable for a public website - # as it could become a denial-of-service vector - QP_FLAGS => FLAG_PHRASE|FLAG_BOOLEAN|FLAG_LOVEHATE|FLAG_WILDCARD, }; my %bool_pfx_external = ( @@ -113,18 +119,43 @@ EOF ); chomp @HELP; -sub xdir { - my ($self) = @_; +sub xdir ($;$) { + my ($self, $rdonly) = @_; if ($self->{version} == 1) { "$self->{mainrepo}/public-inbox/xapian" . SCHEMA_VERSION; } else { my $dir = "$self->{mainrepo}/xap" . SCHEMA_VERSION; + return $dir if $rdonly; + my $part = $self->{partition}; defined $part or die "partition not given"; $dir .= "/$part"; } } +sub xdb ($) { + my ($self) = @_; + $self->{xdb} ||= do { + load_xapian(); + my $dir = xdir($self, 1); + if ($self->{version} >= 2) { + my $xdb; + foreach my $part (<$dir/*>) { + -d $part && $part =~ m!/\d+\z! or next; + my $sub = Search::Xapian::Database->new($part); + if ($xdb) { + $xdb->add_database($sub); + } else { + $xdb = $sub; + } + } + $xdb; + } else { + Search::Xapian::Database->new($dir); + } + }; +} + sub new { my ($class, $mainrepo, $altid) = @_; my $version = 1; @@ -138,33 +169,16 @@ sub new { altid => $altid, version => $version, }, $class; - my $dir; - if ($version >= 2) { - $dir = "$self->{mainrepo}/xap" . SCHEMA_VERSION; - my $xdb; - my $parts = 0; - foreach my $part (<$dir/*>) { - -d $part && $part =~ m!/\d+\z! or next; - $parts++; - my $sub = Search::Xapian::Database->new($part); - if ($xdb) { - $xdb->add_database($sub); - } else { - $xdb = $sub; - } - } - $self->{xdb} = $xdb; - } else { - $dir = $self->xdir; - $self->{xdb} = Search::Xapian::Database->new($dir); - } + my $dir = xdir($self, 1); $self->{over_ro} = PublicInbox::Over->new("$dir/over.sqlite3"); $self; } sub reopen { my ($self) = @_; - $self->{xdb}->reopen; + if (my $xdb = $self->{xdb}) { + $xdb->reopen; + } $self; # make chaining easier } @@ -175,7 +189,8 @@ sub query { if ($query_string eq '' && !$opts->{mset}) { $self->{over_ro}->recent($opts); } else { - my $query = $self->qp->parse_query($query_string, QP_FLAGS); + my $qp = qp($self); + my $query = $qp->parse_query($query_string, $QP_FLAGS); $opts->{relevance} = 1 unless exists $opts->{relevance}; _do_enquire($self, $query, $opts); } @@ -213,7 +228,8 @@ sub _do_enquire { sub _enquire_once { my ($self, $query, $opts) = @_; - my $enquire = Search::Xapian::Enquire->new($self->{xdb}); + my $xdb = xdb($self); + my $enquire = Search::Xapian::Enquire->new($xdb); $enquire->set_query($query); $opts ||= {}; my $desc = !$opts->{asc}; @@ -246,13 +262,13 @@ sub qp { my $qp = $self->{query_parser}; return $qp if $qp; - + my $xdb = xdb($self); # new parser $qp = Search::Xapian::QueryParser->new; - $qp->set_default_op(OP_AND); - $qp->set_database($self->{xdb}); + $qp->set_default_op(OP_AND()); + $qp->set_database($xdb); $qp->set_stemmer($self->stemmer); - $qp->set_stemming_strategy(STEM_SOME); + $qp->set_stemming_strategy(STEM_SOME()); $qp->set_max_wildcard_expansion(100); $qp->add_valuerangeprocessor( Search::Xapian::NumberValueRangeProcessor->new(YYYYMMDD, 'd:')); diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 1b86f727..135b5eb9 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -54,11 +54,10 @@ sub new { die("Invalid indexlevel $ibx->{indexlevel}\n"); } } - } else { # v1 + } else { # FIXME: old tests: old tests $ibx = { mainrepo => $git_dir, version => 1 }; } $ibx = PublicInbox::InboxWritable->new($ibx); - require Search::Xapian::WritableDatabase; my $self = bless { mainrepo => $mainrepo, -inbox => $ibx, @@ -84,25 +83,36 @@ sub new { $self; } +sub need_xapian ($) { $_[0]->{indexlevel} =~ $xapianlevels } + sub _xdb_release { my ($self) = @_; - my $xdb = delete $self->{xdb} or croak 'not acquired'; - $xdb->close; + if (need_xapian($self)) { + my $xdb = delete $self->{xdb} or croak 'not acquired'; + $xdb->close; + } $self->lock_release if $self->{creat}; undef; } sub _xdb_acquire { my ($self) = @_; - croak 'already acquired' if $self->{xdb}; + my $flag; my $dir = $self->xdir; - my $flag = Search::Xapian::DB_OPEN; + if (need_xapian($self)) { + croak 'already acquired' if $self->{xdb}; + PublicInbox::Search::load_xapian(); + require Search::Xapian::WritableDatabase; + $flag = $self->{creat} ? + Search::Xapian::DB_CREATE_OR_OPEN() : + Search::Xapian::DB_OPEN(); + } if ($self->{creat}) { require File::Path; $self->lock_acquire; File::Path::mkpath($dir); - $flag = Search::Xapian::DB_CREATE_OR_OPEN; } + return unless defined $flag; $self->{xdb} = Search::Xapian::WritableDatabase->new($dir, $flag); } @@ -342,7 +352,7 @@ sub add_message { $num = index_mm($self, $mime); } eval { - if ($self->{indexlevel} =~ $xapianlevels) { + if (need_xapian($self)) { $self->add_xapian($mime, $num, $oid, $mids, $mid0) } if (my $over = $self->{over}) { @@ -383,7 +393,6 @@ sub batch_do { # v1 only, where $mid is unique sub remove_message { my ($self, $mid) = @_; - my $db = $self->{xdb}; $mid = mid_clean($mid); if (my $over = $self->{over}) { @@ -394,7 +403,8 @@ sub remove_message { warn "<$mid> missing for removal from overview\n"; } } - return if $self->{indexlevel} !~ $xapianlevels; + return unless need_xapian($self); + my $db = $self->{xdb}; my $nr = 0; eval { batch_do($self, 'Q' . $mid, sub { @@ -413,10 +423,12 @@ sub remove_message { # MID is a hint in V2 sub remove_by_oid { my ($self, $oid, $mid) = @_; - my $db = $self->{xdb}; $self->{over}->remove_oid($oid, $mid) if $self->{over}; + return unless need_xapian($self); + my $db = $self->{xdb}; + # XXX careful, we cannot use batch_do here since we conditionally # delete documents based on other factors, so we cannot call # find_doc_ids twice. @@ -664,7 +676,7 @@ sub _last_x_commit { my ($self, $mm) = @_; my $lm = $mm->last_commit || ''; my $lx = ''; - if ($self->{indexlevel} =~ $xapianlevels) { + if (need_xapian($self)) { $lx = $self->{xdb}->get_metadata('last_commit') || ''; } else { $lx = $lm; @@ -695,7 +707,7 @@ sub _index_sync { $self->{over}->disconnect; $git->cleanup; delete $self->{txn}; - $xdb->cancel_transaction; + $xdb->cancel_transaction if $xdb; $xdb = _xdb_release($self); # ensure we leak no FDs to "git log" with Xapian <= 1.2 @@ -717,7 +729,7 @@ sub _index_sync { } $dbh->commit; } - if ($newest && $self->{indexlevel} =~ $xapianlevels) { + if ($newest && need_xapian($self)) { my $cur = $xdb->get_metadata('last_commit'); if (need_update($self, $cur, $newest)) { $xdb->set_metadata('last_commit', $newest); @@ -785,7 +797,7 @@ sub begin_txn_lazy { $self->{-inbox}->with_umask(sub { my $xdb = $self->{xdb} || $self->_xdb_acquire; $self->{over}->begin_lazy if $self->{over}; - $xdb->begin_transaction; + $xdb->begin_transaction if $xdb; $self->{txn} = 1; $xdb; }); @@ -795,14 +807,18 @@ sub commit_txn_lazy { my ($self) = @_; delete $self->{txn} or return; $self->{-inbox}->with_umask(sub { - $self->{xdb}->commit_transaction; + if (my $xdb = $self->{xdb}) { + $xdb->commit_transaction; + } $self->{over}->commit_lazy if $self->{over}; }); } sub worker_done { my ($self) = @_; - die "$$ $0 xdb not released\n" if $self->{xdb}; + if (need_xapian($self)) { + die "$$ $0 xdb not released\n" if $self->{xdb}; + } die "$$ $0 still in transaction\n" if $self->{txn}; } diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm index 0f963dcb..8e1b1afe 100644 --- a/lib/PublicInbox/WWW.pm +++ b/lib/PublicInbox/WWW.pm @@ -149,8 +149,11 @@ sub preload { require PublicInbox::MIME; require Digest::SHA; require POSIX; - - foreach (qw(PublicInbox::Search PublicInbox::SearchView + eval { + require PublicInbox::Search; + PublicInbox::Search::load_xapian(); + }; + foreach (qw(PublicInbox::SearchView PublicInbox::Mbox IO::Compress::Gzip PublicInbox::NewsWWW)) { eval "require $_;"; diff --git a/script/public-inbox-index b/script/public-inbox-index index b353093e..53def9a7 100755 --- a/script/public-inbox-index +++ b/script/public-inbox-index @@ -26,10 +26,12 @@ if ($@) { my $reindex; my $prune; my $jobs = undef; +my $indexlevel; my %opts = ( '--reindex' => \$reindex, '--jobs|j=i' => \$jobs, '--prune' => \$prune, + 'L|indexlevel=s' => \$indexlevel, ); GetOptions(%opts) or die "bad command-line args\n$usage"; die "--jobs must be positive\n" if defined $jobs && $jobs < 0; @@ -55,18 +57,27 @@ defined($config) and $config->each_inbox(sub { }); foreach my $dir (@dirs) { - if (!ref($dir) && -f "$dir/inbox.lock") { # v2 - my $ibx = { mainrepo => $dir, name => 'unnamed' }; + if (!ref($dir)) { + unless (-d $dir) { + die "$dir does not appear to be an inbox repository\n"; + } + my $ibx = { + mainrepo => $dir, + name => 'unnamed', + indexlevel => $indexlevel, + version => -f "$dir/inbox.lock" ? 2 : 1, + }; $dir = PublicInbox::Inbox->new($ibx); + } elsif (defined $indexlevel && !defined($dir->{indexlevel})) { + # XXX: users can shoot themselves in the foot, with this... + $dir->{indexlevel} = $indexlevel; } - index_dir($dir); + + index_inbox($dir); } -sub index_dir { +sub index_inbox { my ($repo) = @_; - if (!ref $repo && ! -d $repo) { - die "$repo does not appear to be an inbox repository\n"; - } if (ref($repo) && ($repo->{version} || 1) == 2) { eval { require PublicInbox::V2Writable }; die "v2 requirements not met: $@\n" if $@; diff --git a/script/public-inbox-init b/script/public-inbox-init index 8bb78451..9f0bd1bd 100755 --- a/script/public-inbox-init +++ b/script/public-inbox-init @@ -109,6 +109,7 @@ my $ibx = PublicInbox::Inbox->new({ name => $name, version => $version, -primary_address => $address[0], + indexlevel => $indexlevel, }); if ($version >= 2) { diff --git a/script/public-inbox-purge b/script/public-inbox-purge index 688dd950..264bcdef 100755 --- a/script/public-inbox-purge +++ b/script/public-inbox-purge @@ -63,6 +63,11 @@ if ($all) { mainrepo => $dir, }); }; + + # somebody could "rm -r" all the Xapian directories; + # let them purge the overview, at least + $ibx->{indexlevel} ||= 'basic' unless $ibx->search; + push @inboxes, $ibx; } diff --git a/t/admin.t b/t/admin.t index b5a4383e..3790c9e2 100644 --- a/t/admin.t +++ b/t/admin.t @@ -50,7 +50,7 @@ SKIP: { # v2 SKIP: { - for my $m (qw(DBD::SQLite Search::Xapian)) { + for my $m (qw(DBD::SQLite)) { skip "$m missing", 5 unless eval "require $m"; } use_ok 'PublicInbox::V2Writable'; diff --git a/t/cgi.t b/t/cgi.t index b24bbc4a..d3172bf7 100644 --- a/t/cgi.t +++ b/t/cgi.t @@ -30,6 +30,7 @@ my $cfgpfx = "publicinbox.test"; my %cfg = ( "$cfgpfx.address" => $addr, "$cfgpfx.mainrepo" => $maindir, + "$cfgpfx.indexlevel" => 'basic', ); while (my ($k,$v) = each %cfg) { is(0, system(qw(git config --file), $pi_config, $k, $v), @@ -39,9 +40,12 @@ my $cfgpfx = "publicinbox.test"; use_ok 'PublicInbox::Git'; use_ok 'PublicInbox::Import'; -use_ok 'Email::MIME'; -my $git = PublicInbox::Git->new($maindir); -my $im = PublicInbox::Import->new($git, 'test', $addr); +use_ok 'PublicInbox::Inbox'; +use_ok 'PublicInbox::V1Writable'; +use_ok 'PublicInbox::Config'; +my $cfg = PublicInbox::Config->new($pi_config); +my $ibx = $cfg->lookup_name('test'); +my $im = PublicInbox::V1Writable->new($ibx); { local $ENV{HOME} = $home; @@ -103,8 +107,9 @@ EOF like($res->{head}, qr/^Status: 501 /, "search not-yet-enabled"); my $indexed; eval { + require DBD::SQLite; require PublicInbox::SearchIdx; - my $s = PublicInbox::SearchIdx->new($maindir, 1); + my $s = PublicInbox::SearchIdx->new($ibx, 1); $s->index_sync; $indexed = 1; }; @@ -120,6 +125,7 @@ EOF }; } else { like($res->{head}, qr/^Status: 501 /, "search not available"); + SKIP: { skip 'DBD::SQLite not available', 2 }; } my $have_xml_feed = eval { require XML::Feed; 1 } if $indexed; @@ -132,6 +138,8 @@ EOF my $p = XML::Feed->parse(\($res->{body})); is($p->format, "Atom", "parsed atom feed"); is(scalar $p->entries, 3, "parsed three entries"); + } else { + SKIP: { skip 'DBD::SQLite or XML::Feed missing', 2 }; } } diff --git a/t/indexlevels-mirror.t b/t/indexlevels-mirror.t index e25b827f..cac7050b 100644 --- a/t/indexlevels-mirror.t +++ b/t/indexlevels-mirror.t @@ -10,8 +10,7 @@ require './t/common.perl'; require_git(2.6); my $this = (split('/', __FILE__))[-1]; -# TODO: remove Search::Xapian as a requirement for basic -foreach my $mod (qw(DBD::SQLite Search::Xapian)) { +foreach my $mod (qw(DBD::SQLite)) { eval "require $mod"; plan skip_all => "$mod missing for $this" if $@; } @@ -47,8 +46,11 @@ sub import_index_incremental { $im->done; # index master (required for v1) - is(system($index, $ibx->{mainrepo}), 0, 'index master OK'); - my $ro_master = PublicInbox::Inbox->new({mainrepo => $ibx->{mainrepo}}); + is(system($index, $ibx->{mainrepo}, "-L$level"), 0, 'index master OK'); + my $ro_master = PublicInbox::Inbox->new({ + mainrepo => $ibx->{mainrepo}, + indexlevel => $level + }); my ($nr, $msgs) = $ro_master->recent; is($nr, 1, 'only one message in master, so far'); is($msgs->[0]->{mid}, 'm@1', 'first message in master indexed'); @@ -75,7 +77,10 @@ sub import_index_incremental { is(system($index, $mirror), 0, "v$v index mirror OK"); # read-only access - my $ro_mirror = PublicInbox::Inbox->new({mainrepo => $mirror}); + my $ro_mirror = PublicInbox::Inbox->new({ + mainrepo => $mirror, + indexlevel => 'basic' + }); ($nr, $msgs) = $ro_mirror->recent; is($nr, 1, 'only one message, so far'); is($msgs->[0]->{mid}, 'm@1', 'read first message'); @@ -94,7 +99,7 @@ sub import_index_incremental { ['m@1','m@2'], 'got both messages in mirror'); # incremental index master (required for v1) - is(system($index, $ibx->{mainrepo}), 0, 'index master OK'); + is(system($index, $ibx->{mainrepo}, "-L$level"), 0, 'index master OK'); ($nr, $msgs) = $ro_master->recent; is($nr, 2, '2nd message seen in master'); is_deeply([sort { $a cmp $b } map { $_->{mid} } @$msgs], diff --git a/t/nntp.t b/t/nntp.t index c4b9737c..7ab801c1 100644 --- a/t/nntp.t +++ b/t/nntp.t @@ -4,7 +4,7 @@ use strict; use warnings; use Test::More; -foreach my $mod (qw(DBD::SQLite Search::Xapian Data::Dumper)) { +foreach my $mod (qw(DBD::SQLite Data::Dumper)) { eval "require $mod"; plan skip_all => "$mod missing for nntp.t" if $@; } diff --git a/t/nntpd.t b/t/nntpd.t index b8a84a0c..c7ea3197 100644 --- a/t/nntpd.t +++ b/t/nntpd.t @@ -3,7 +3,7 @@ use strict; use warnings; use Test::More; -foreach my $mod (qw(DBD::SQLite Search::Xapian)) { +foreach my $mod (qw(DBD::SQLite)) { eval "require $mod"; plan skip_all => "$mod missing for nntpd.t" if $@; } @@ -55,12 +55,13 @@ my $ibx = { name => $group, version => $version, -primary_address => $addr, + indexlevel => 'basic', }; $ibx = PublicInbox::Inbox->new($ibx); { local $ENV{HOME} = $home; my @cmd = ($init, $group, $mainrepo, 'http://example.com/', $addr); - push @cmd, "-V$version"; + push @cmd, "-V$version", '-Lbasic'; is(system(@cmd), 0, 'init OK'); is(system(qw(git config), "--file=$home/.public-inbox/config", "publicinbox.$group.newsgroup", $group), @@ -71,8 +72,8 @@ $ibx = PublicInbox::Inbox->new($ibx); if ($version == 2) { $im = PublicInbox::V2Writable->new($ibx); } elsif ($version == 1) { - my $git = PublicInbox::Git->new($mainrepo); - $im = PublicInbox::Import->new($git, 'test', $addr); + use_ok 'PublicInbox::V1Writable'; + $im = PublicInbox::V1Writable->new($ibx); } else { die "unsupported version: $version"; } @@ -261,6 +262,12 @@ EOF is($rdr, waitpid($rdr, 0), 'reader done'); is($? >> 8, 0, 'no errors'); } + SKIP: { + my @of = `lsof -p $pid 2>/dev/null`; + skip('lsof broken', 1) if (!scalar(@of) || $?); + my @xap = grep m!Search/Xapian!, @of; + is_deeply(\@xap, [], 'Xapian not loaded in nntpd'); + } { setsockopt($s, IPPROTO_TCP, TCP_NODELAY, 1); syswrite($s, 'HDR List-id 1-'); diff --git a/t/over.t b/t/over.t index 7a3c9721..c0d9d5e5 100644 --- a/t/over.t +++ b/t/over.t @@ -5,8 +5,7 @@ use warnings; use Test::More; use File::Temp qw/tempdir/; use Compress::Zlib qw(compress); -# FIXME: allow using Over w/o Xapian -foreach my $mod (qw(DBD::SQLite Search::Xapian)) { +foreach my $mod (qw(DBD::SQLite)) { eval "require $mod"; plan skip_all => "$mod missing for over.t" if $@; } diff --git a/t/psgi_bad_mids.t b/t/psgi_bad_mids.t index 5008f5be..71eefe59 100644 --- a/t/psgi_bad_mids.t +++ b/t/psgi_bad_mids.t @@ -7,7 +7,7 @@ use File::Temp qw/tempdir/; use PublicInbox::MIME; use PublicInbox::Config; use PublicInbox::WWW; -my @mods = qw(DBD::SQLite Search::Xapian HTTP::Request::Common Plack::Test +my @mods = qw(DBD::SQLite HTTP::Request::Common Plack::Test URI::Escape Plack::Builder); foreach my $mod (@mods) { eval "require $mod"; @@ -22,6 +22,7 @@ my $ibx = { name => 'bad-mids', version => 2, -primary_address => 'test@example.com', + indexlevel => 'basic', }; $ibx = PublicInbox::Inbox->new($ibx); my $im = PublicInbox::V2Writable->new($ibx, 1); diff --git a/t/psgi_scan_all.t b/t/psgi_scan_all.t index e9c439ec..2f54c820 100644 --- a/t/psgi_scan_all.t +++ b/t/psgi_scan_all.t @@ -6,8 +6,7 @@ use Test::More; use Email::MIME; use File::Temp qw/tempdir/; use PublicInbox::Config; -my @mods = qw(HTTP::Request::Common Plack::Test URI::Escape Search::Xapian - DBD::SQLite); +my @mods = qw(HTTP::Request::Common Plack::Test URI::Escape DBD::SQLite); foreach my $mod (@mods) { eval "require $mod"; plan skip_all => "$mod missing for psgi_scan_all.t" if $@; @@ -26,6 +25,7 @@ foreach my $i (1..2) { mainrepo => $mainrepo, name => "test-$i", version => 2, + indexlevel => 'basic', -primary_address => $addr, }; my $ibx = PublicInbox::Inbox->new($opt); diff --git a/t/psgi_search.t b/t/psgi_search.t index da6cc682..1adc1d6e 100644 --- a/t/psgi_search.t +++ b/t/psgi_search.t @@ -8,13 +8,13 @@ use Email::MIME; use PublicInbox::Config; use PublicInbox::WWW; use bytes (); # only for bytes::length -my @mods = qw(PublicInbox::SearchIdx HTTP::Request::Common Plack::Test +my @mods = qw(Search::Xapian HTTP::Request::Common Plack::Test URI::Escape Plack::Builder); foreach my $mod (@mods) { eval "require $mod"; plan skip_all => "$mod missing for psgi_search.t" if $@; } -use_ok $_ foreach @mods; +use_ok $_ foreach (@mods, qw(PublicInbox::SearchIdx)); my $tmpdir = tempdir('pi-psgi-search.XXXXXX', TMPDIR => 1, CLEANUP => 1); my $git_dir = "$tmpdir/a.git"; diff --git a/t/purge.t b/t/purge.t index 574935e0..c1e0e9a0 100644 --- a/t/purge.t +++ b/t/purge.t @@ -6,7 +6,7 @@ use Test::More; use File::Temp qw/tempdir/; require './t/common.perl'; require_git(2.6); -my @mods = qw(IPC::Run DBI DBD::SQLite Search::Xapian); +my @mods = qw(IPC::Run DBI DBD::SQLite); foreach my $mod (@mods) { eval "require $mod"; plan skip_all => "missing $mod for t/purge.t" if $@; diff --git a/t/search-thr-index.t b/t/search-thr-index.t index ed3e4e76..848dc564 100644 --- a/t/search-thr-index.t +++ b/t/search-thr-index.t @@ -7,8 +7,9 @@ use Test::More; use File::Temp qw/tempdir/; use PublicInbox::MID qw(mids); use Email::MIME; -eval { require PublicInbox::SearchIdx; }; -plan skip_all => "Xapian missing for search" if $@; +eval { require Search::Xapian }; +plan skip_all => "Search::Xapian missing for search" if $@; +require PublicInbox::SearchIdx; my $tmpdir = tempdir('pi-search-thr-index.XXXXXX', TMPDIR => 1, CLEANUP => 1); my $git_dir = "$tmpdir/a.git"; diff --git a/t/search.t b/t/search.t index 493e00de..a9d0c920 100644 --- a/t/search.t +++ b/t/search.t @@ -3,8 +3,9 @@ use strict; use warnings; use Test::More; -eval { require PublicInbox::SearchIdx; }; -plan skip_all => "Xapian missing for search" if $@; +eval { require Search::Xapian }; +plan skip_all => "Search::Xapian missing for search" if $@; +require PublicInbox::SearchIdx; use File::Temp qw/tempdir/; use Email::MIME; my $tmpdir = tempdir('pi-search-XXXXXX', TMPDIR => 1, CLEANUP => 1); @@ -12,7 +13,7 @@ my $git_dir = "$tmpdir/a.git"; my ($root_id, $last_id); is(0, system(qw(git init --shared -q --bare), $git_dir), "git init (main)"); -eval { PublicInbox::Search->new($git_dir) }; +eval { PublicInbox::Search->new($git_dir)->xdb }; ok($@, "exception raised on non-existent DB"); my $rw = PublicInbox::SearchIdx->new($git_dir, 1); diff --git a/t/v1reindex.t b/t/v1reindex.t index 33a36fad..402ecd78 100644 --- a/t/v1reindex.t +++ b/t/v1reindex.t @@ -209,8 +209,7 @@ ok(!-d $xap, 'Xapian directories removed again'); delete $ibx->{mm}; is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged'); is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); - my $mset = $ibx->search->reopen->query('hello world', {mset=>1}); - is($mset->size, 0, "no Xapian search results"); + isnt($ibx->search, 'no search for basic'); my ($min, $max) = $ibx->mm->minmax; is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged'); diff --git a/t/v2reindex.t b/t/v2reindex.t index c416629c..2a6fc555 100644 --- a/t/v2reindex.t +++ b/t/v2reindex.t @@ -225,8 +225,9 @@ ok(!-d $xap, 'Xapian directories removed again'); delete $ibx->{mm}; is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged'); is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); - my $mset = $ibx->search->query('freedom', {mset=>1}); - is($mset->size, 0, "search fails on indexlevel='basic'"); + + isnt($ibx->search, 'no search for basic'); + for (<"$xap/*/*">) { $sizes{$ibx->{indexlevel}} += -s _ if -f $_ } ok($sizes{medium} > $sizes{basic}, 'basic is smaller than medium'); diff --git a/t/watch_maildir_v2.t b/t/watch_maildir_v2.t index 5f968919..b2cff4ba 100644 --- a/t/watch_maildir_v2.t +++ b/t/watch_maildir_v2.t @@ -7,12 +7,12 @@ use Cwd; use PublicInbox::Config; require './t/common.perl'; require_git(2.6); -my @mods = qw(Filesys::Notify::Simple PublicInbox::V2Writable); +my @mods = qw(Search::Xapian DBD::SQLite Filesys::Notify::Simple); foreach my $mod (@mods) { eval "require $mod"; plan skip_all => "$mod missing for watch_maildir_v2.t" if $@; } - +require PublicInbox::V2Writable; my $tmpdir = tempdir('watch_maildir-v2-XXXXXX', TMPDIR => 1, CLEANUP => 1); my $mainrepo = "$tmpdir/v2"; my $maildir = "$tmpdir/md"; -- cgit v1.2.3-24-ge0c7 From b5c64fc01d3e7ca6243fdd8b811b6291bdb12235 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 15 May 2019 01:18:10 +0000 Subject: searchidx: do not create empty Xapian partitions for basic No point in leaving a mess of empty directories when Xapian doesn't load. --- lib/PublicInbox/SearchIdx.pm | 7 ++++++- t/indexlevels-mirror.t | 5 +++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 135b5eb9..f96f0d03 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -110,7 +110,12 @@ sub _xdb_acquire { if ($self->{creat}) { require File::Path; $self->lock_acquire; - File::Path::mkpath($dir); + + # don't create empty Xapian directories if we don't need Xapian + my $is_part = defined($self->{partition}); + if (!$is_part || ($is_part && need_xapian($self))) { + File::Path::mkpath($dir); + } } return unless defined $flag; $self->{xdb} = Search::Xapian::WritableDatabase->new($dir, $flag); diff --git a/t/indexlevels-mirror.t b/t/indexlevels-mirror.t index cac7050b..3dd43238 100644 --- a/t/indexlevels-mirror.t +++ b/t/indexlevels-mirror.t @@ -116,6 +116,11 @@ sub import_index_incremental { is($nr, 1, '2nd message gone from mirror'); is_deeply([map { $_->{mid} } @$msgs], ['m@1'], 'message unavailable in mirror'); + + if ($v == 2 && $level eq 'basic') { + is_deeply([glob("$ibx->{mainrepo}/xap*/?/")], [], + 'no Xapian partition directories for v2 basic'); + } } # we can probably cull some other tests and put full/medium tests, here -- cgit v1.2.3-24-ge0c7 From d0e8bfd866ed1e924e8d9f551939eecbea4920ef Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 15 May 2019 01:18:09 +0000 Subject: admin: improve warnings and errors for missing modules Since we lazy-load Xapian now, some errors may become more cryptic or buried. Try to improve that by making Admin show better errors. --- lib/PublicInbox/Admin.pm | 60 +++++++++++++++++++++++++++++++++++++++++++++++ script/public-inbox-index | 32 ++++++++++++++----------- script/public-inbox-init | 7 ++++-- script/public-inbox-purge | 53 +++++++++++++++++++++++++++++++---------- 4 files changed, 124 insertions(+), 28 deletions(-) diff --git a/lib/PublicInbox/Admin.pm b/lib/PublicInbox/Admin.pm index d0a8dd00..3eff5cde 100644 --- a/lib/PublicInbox/Admin.pm +++ b/lib/PublicInbox/Admin.pm @@ -41,4 +41,64 @@ sub resolve_repo_dir { } } +# TODO: make Devel::Peek optional, only used for daemon +my @base_mod = qw(Email::MIME Date::Parse Devel::Peek); +my @over_mod = qw(DBD::SQLite DBI); +my %mod_groups = ( + -index => [ @base_mod, @over_mod ], + -base => \@base_mod, + -search => [ @base_mod, @over_mod, 'Search::Xapian' ], +); + +sub scan_ibx_modules ($$) { + my ($mods, $ibx) = @_; + if (!$ibx->{indexlevel} || $ibx->{indexlevel} ne 'basic') { + $mods->{'Search::Xapian'} = 1; + } else { + $mods->{$_} = 1 foreach @over_mod; + } +} + +sub check_require { + my (@mods) = @_; + my $err = {}; + while (my $mod = shift @mods) { + if (my $groups = $mod_groups{$mod}) { + push @mods, @$groups; + } else { + eval "require $mod"; + $err->{$mod} = $@ if $@; + } + } + scalar keys %$err ? $err : undef; +} + +sub missing_mod_msg { + my ($err) = @_; + my @mods = map { "`$_'" } sort keys %$err; + my $last = pop @mods; + @mods ? (join(', ', @mods)."' and $last") : $last +} + +sub require_or_die { + my $err = check_require(@_) or return; + die missing_mod_msg($err)." required for $0\n"; +} + +sub indexlevel_ok_or_die ($) { + my ($indexlevel) = @_; + my $req; + if ($indexlevel eq 'basic') { + $req = '-index'; + } elsif ($indexlevel =~ /\A(?:medium|full)\z/) { + $req = '-search'; + } else { + die <<""; +invalid indexlevel=$indexlevel (must be `basic', `medium', or `full') + + } + my $err = check_require($req) or return; + die missing_mod_msg($err) ." required for indexlevel=$indexlevel\n"; +} + 1; diff --git a/script/public-inbox-index b/script/public-inbox-index index 53def9a7..cf001cc1 100755 --- a/script/public-inbox-index +++ b/script/public-inbox-index @@ -10,18 +10,14 @@ use strict; use warnings; use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); my $usage = "public-inbox-index REPO_DIR"; -use PublicInbox::Config; use PublicInbox::Admin qw(resolve_repo_dir); +PublicInbox::Admin::require_or_die('-index'); +require PublicInbox::Config; my $config = eval { PublicInbox::Config->new } || eval { warn "public-inbox unconfigured for serving, indexing anyways...\n"; undef; }; -eval { require PublicInbox::SearchIdx }; -if ($@) { - print STDERR "Search::Xapian required for $0\n"; - exit 1; -} my $reindex; my $prune; @@ -56,26 +52,34 @@ defined($config) and $config->each_inbox(sub { } }); +my @inboxes; +my $mods = {}; + foreach my $dir (@dirs) { - if (!ref($dir)) { + my $ibx = $dir; + if (!ref($ibx)) { unless (-d $dir) { die "$dir does not appear to be an inbox repository\n"; } - my $ibx = { + $ibx = PublicInbox::Inbox->new({ mainrepo => $dir, name => 'unnamed', indexlevel => $indexlevel, version => -f "$dir/inbox.lock" ? 2 : 1, - }; - $dir = PublicInbox::Inbox->new($ibx); - } elsif (defined $indexlevel && !defined($dir->{indexlevel})) { + }); + } elsif (defined $indexlevel && !defined($ibx->{indexlevel})) { # XXX: users can shoot themselves in the foot, with this... - $dir->{indexlevel} = $indexlevel; + $ibx->{indexlevel} = $indexlevel; } - - index_inbox($dir); + push @inboxes, $ibx; + PublicInbox::Admin::scan_ibx_modules($mods, $ibx); } +PublicInbox::Admin::require_or_die(keys %$mods); + +require PublicInbox::SearchIdx; +index_inbox($_) for @inboxes; + sub index_inbox { my ($repo) = @_; if (ref($repo) && ($repo->{version} || 1) == 2) { diff --git a/script/public-inbox-init b/script/public-inbox-init index 9f0bd1bd..5516e798 100755 --- a/script/public-inbox-init +++ b/script/public-inbox-init @@ -7,8 +7,10 @@ use strict; use warnings; my $usage = "public-inbox-init NAME REPO_DIR HTTP_URL ADDRESS [ADDRESS..]"; use Getopt::Long qw/:config gnu_getopt no_ignore_case auto_abbrev/; -use PublicInbox::Config; -use PublicInbox::Inbox; +use PublicInbox::Admin; +PublicInbox::Admin::require_or_die('-base'); +require PublicInbox::Config; +require PublicInbox::Inbox; use File::Temp qw/tempfile/; use File::Basename qw/dirname/; use File::Path qw/mkpath/; @@ -24,6 +26,7 @@ my %opts = ( 'V|version=i' => \$version, 'S|skip=i' => \$skip, ); GetOptions(%opts) or usage(); +PublicInbox::Admin::indexlevel_ok_or_die($indexlevel) if defined $indexlevel; my $name = shift @ARGV or usage(); my $mainrepo = shift @ARGV or usage(); my $http_url = shift @ARGV or usage(); diff --git a/script/public-inbox-purge b/script/public-inbox-purge index 264bcdef..381826dc 100755 --- a/script/public-inbox-purge +++ b/script/public-inbox-purge @@ -7,16 +7,16 @@ use strict; use warnings; use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); -use PublicInbox::Config; -use PublicInbox::MIME; use PublicInbox::Admin qw(resolve_repo_dir); -use PublicInbox::Filter::Base; -*REJECT = *PublicInbox::Filter::Base::REJECT; +PublicInbox::Admin::check_require('-index'); +require PublicInbox::Filter::Base; +require PublicInbox::Config; +require PublicInbox::MIME; +require PublicInbox::V2Writable; -my $usage = "$0 [--all] [INBOX_DIRS] new }; my $cfgfile = PublicInbox::Config::default_file(); my ($all, $force); @@ -63,11 +63,6 @@ if ($all) { mainrepo => $dir, }); }; - - # somebody could "rm -r" all the Xapian directories; - # let them purge the overview, at least - $ibx->{indexlevel} ||= 'basic' unless $ibx->search; - push @inboxes, $ibx; } @@ -77,6 +72,40 @@ if ($all) { } } +foreach my $ibx (@inboxes) { + my $lvl = $ibx->{indexlevel}; + if (defined $lvl) { + PublicInbox::Admin::indexlevel_ok_or_die($lvl); + next; + } + + # Undefined indexlevel, so `full'... + # Search::Xapian exists and the DB can be read, at least, fine + $ibx->search and next; + + # it's possible for a Xapian directory to exist, but Search::Xapian + # to go missing/broken. Make sure it's purged in that case: + $ibx->over or die "no over.sqlite3 in $ibx->{mainrepo}\n"; + + # $ibx->{search} is populated by $ibx->over call + my $xdir_ro = $ibx->{search}->xdir(1); + my $npart = 0; + foreach my $part (<$xdir_ro/*>) { + if (-d $part && $part =~ m!/\d+\z!) { + my $bytes = 0; + $bytes += -s $_ foreach glob("$part/*"); + $npart++ if $bytes; + } + } + if ($npart) { + PublicInbox::Admin::require_or_die('-search'); + } else { + # somebody could "rm -r" all the Xapian directories; + # let them purge the overview, at least + $ibx->{indexlevel} ||= 'basic'; + } +} + my $data = do { local $/; scalar }; $data =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; my $n_purged = 0; -- cgit v1.2.3-24-ge0c7