From fad9acd35e56a289ade90a62d056b2a6663d448c Mon Sep 17 00:00:00 2001 From: "Eric Wong (Contractor, The Linux Foundation)" Date: Fri, 6 Apr 2018 21:44:38 +0000 Subject: www: favor reading more from SQLite, and less from Xapian Favor simpler internal APIs this time around, this cuts a fair amount of code out and takes another step towards removing Xapian as a dependency for v2 repos. --- lib/PublicInbox/Inbox.pm | 2 +- lib/PublicInbox/Mbox.pm | 39 +++++++++++++------------------- lib/PublicInbox/Over.pm | 29 ++++++++++++++++++++++++ lib/PublicInbox/Search.pm | 52 ++++--------------------------------------- lib/PublicInbox/SearchIdx.pm | 3 ++- lib/PublicInbox/V2Writable.pm | 34 ++++++++++++---------------- lib/PublicInbox/View.pm | 35 ++++++++++------------------- 7 files changed, 77 insertions(+), 117 deletions(-) (limited to 'lib') diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm index 0ea18b4c..f71493a0 100644 --- a/lib/PublicInbox/Inbox.pm +++ b/lib/PublicInbox/Inbox.pm @@ -273,7 +273,7 @@ sub msg_by_smsg ($$;$) { my ($self, $smsg, $ref) = @_; # ghosts may have undef smsg (from SearchThread.node) or - # no {blob} field (from each_smsg_by_mid) + # no {blob} field return unless defined $smsg; defined(my $blob = $smsg->{blob}) or return; diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm index c5e1cb9c..4427ae5d 100644 --- a/lib/PublicInbox/Mbox.pm +++ b/lib/PublicInbox/Mbox.pm @@ -34,19 +34,17 @@ sub mb_stream { # called by PSGI server as body response sub getline { my ($more) = @_; # self - my ($ctx, $head, $tail, $db, $cur) = @$more; - if ($cur) { + my ($ctx, $id, $prev, $next, $cur) = @$more; + if ($cur) { # first pop @$more; return msg_str($ctx, $cur); } - for (; !defined($cur) && $head != $tail; $head++) { - my $smsg = PublicInbox::SearchMsg->get($head, $db, $ctx->{mid}); - my $mref = $ctx->{-inbox}->msg_by_smsg($smsg) or next; - $cur = Email::Simple->new($mref); - $cur = msg_str($ctx, $cur); - } - $more->[1] = $head; - $cur; + $cur = $next or return; + my $ibx = $ctx->{-inbox}; + $next = $ibx->search->next_by_mid($ctx->{mid}, \$id, \$prev); + @$more = ($ctx, $id, $prev, $next); # $next may be undef, here + my $mref = $ibx->msg_by_smsg($cur) or return; + msg_str($ctx, Email::Simple->new($mref)); } sub close {} # noop @@ -57,21 +55,14 @@ sub emit_raw { my $ibx = $ctx->{-inbox}; my $first; my $more; - my ($head, $tail, $db); - my %seen; if (my $srch = $ibx->search) { - $srch->retry_reopen(sub { - ($head, $tail, $db) = $srch->each_smsg_by_mid($mid); - for (; !defined($first) && $head != $tail; $head++) { - my @args = ($head, $db, $mid); - my $smsg = PublicInbox::SearchMsg->get(@args); - my $mref = $ibx->msg_by_smsg($smsg) or next; - $first = Email::Simple->new($mref); - } - if ($head != $tail) { - $more = [ $ctx, $head, $tail, $db, $first ]; - } - }); + my ($id, $prev); + my $smsg = $srch->next_by_mid($mid, \$id, \$prev) or return; + my $mref = $ibx->msg_by_smsg($smsg) or return; + $first = Email::Simple->new($mref); + my $next = $srch->next_by_mid($mid, \$id, \$prev); + # $more is for ->getline + $more = [ $ctx, $id, $prev, $next, $first ] if $next; } else { my $mref = $ibx->msg_by_mid($mid) or return; $first = Email::Simple->new($mref); diff --git a/lib/PublicInbox/Over.pm b/lib/PublicInbox/Over.pm index 0bd6008b..309e044a 100644 --- a/lib/PublicInbox/Over.pm +++ b/lib/PublicInbox/Over.pm @@ -147,4 +147,33 @@ SELECT * from OVER where num = ? LIMIT 1 undef; } +sub next_by_mid { + my ($self, $mid, $id, $prev) = @_; + my $dbh = $self->connect; + + unless (defined $$id) { + my $sth = $dbh->prepare_cached(<<'', undef, 1); + SELECT id FROM msgid WHERE mid = ? LIMIT 1 + + $sth->execute($mid); + $$id = $sth->fetchrow_array; + defined $$id or return; + } + my $sth = $dbh->prepare_cached(<<"", undef, 1); +SELECT num FROM id2num WHERE id = ? AND num > ? +ORDER BY num ASC LIMIT 1 + + $$prev ||= 0; + $sth->execute($$id, $$prev); + my $num = $sth->fetchrow_array or return; + $$prev = $num; + + $sth = $dbh->prepare_cached(<<"", undef, 1); +SELECT num,ts,ds,ddd FROM over WHERE num = ? LIMIT 1 + + $sth->execute($num); + my $smsg = $sth->fetchrow_hashref or return; + load_from_row($smsg); +} + 1; diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index 34ebd1a6..7175ddc5 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -304,58 +304,14 @@ sub query_ts { $self->{over_ro}->query_ts($ts, $prev); } -sub first_smsg_by_mid { - my ($self, $mid) = @_; - my $smsg; - retry_reopen($self, sub { - each_smsg_by_mid($self, $mid, sub { $smsg = $_[0]; undef }); - }); - $smsg; -} - sub lookup_article { my ($self, $num) = @_; - my $term = 'XNUM'.$num; - my $db = $self->{xdb}; - retry_reopen($self, sub { - my $head = $db->postlist_begin($term); - my $tail = $db->postlist_end($term); - return if $head->equal($tail); - my $doc_id = $head->get_docid; - return unless defined $doc_id; - $head->inc; - if ($head->nequal($tail)) { - warn "article #$num is not unique\n"; - } - # raises on error: - my $doc = $db->get_document($doc_id); - my $smsg = PublicInbox::SearchMsg->wrap($doc); - $smsg->{doc_id} = $doc_id; - $smsg->load_expand; - }); + $self->{over_ro}->get_art($num); } -sub each_smsg_by_mid { - my ($self, $mid, $cb) = @_; - # XXX retry_reopen isn't necessary for V2Writable, but the PSGI - # interface will need it... - my $db = $self->{xdb}; - my $term = 'Q' . $mid; - my $head = $db->postlist_begin($term); - my $tail = $db->postlist_end($term); - if ($head == $tail) { - $db->reopen; - $head = $db->postlist_begin($term); - $tail = $db->postlist_end($term); - } - return ($head, $tail, $db) if wantarray; - for (; $head->nequal($tail); $head->inc) { - my $doc_id = $head->get_docid; - my $doc = $db->get_document($doc_id); - my $smsg = PublicInbox::SearchMsg->wrap($doc, $mid); - $smsg->{doc_id} = $doc_id; - $cb->($smsg) or return; - } +sub next_by_mid { + my ($self, $mid, $id, $prev) = @_; + $self->{over_ro}->next_by_mid($mid, $id, $prev); } # normalize subjects so they are suitable as pathnames for URLs diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 42562631..3596972f 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -76,7 +76,8 @@ sub new { if ($version == 1) { $self->{lock_path} = "$mainrepo/ssoma.lock"; my $dir = $self->xdir; - $self->{over} = PublicInbox::OverIdx->new("$dir/over.sqlite3"); + $self->{over_ro} = $self->{over} = + PublicInbox::OverIdx->new("$dir/over.sqlite3"); } elsif ($version == 2) { defined $part or die "partition is required for v2\n"; # partition is a number diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index a8117aaa..877a4591 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -271,22 +271,20 @@ sub remove_internal { foreach my $mid (@$mids) { my %gone; - $srch->reopen->each_smsg_by_mid($mid, sub { - my ($smsg) = @_; - $smsg->load_expand; + my ($id, $prev); + while (my $smsg = $srch->next_by_mid($mid, \$id, \$prev)) { my $msg = $ibx->msg_by_smsg($smsg); if (!defined($msg)) { warn "broken smsg for $mid\n"; - return 1; # continue + next; # continue } my $orig = $$msg; my $cur = PublicInbox::MIME->new($msg); if (content_id($cur) eq $cid) { $smsg->{mime} = $cur; - $gone{$smsg->num} = [ $smsg, \$orig ]; + $gone{$smsg->{num}} = [ $smsg, \$orig ]; } - 1; # continue - }); + } my $n = scalar keys %gone; next unless $n; if ($n > 1) { @@ -552,26 +550,23 @@ sub lookup_content { my $srch = $ibx->search->reopen; my $cid = content_id($mime); my $found; - $srch->each_smsg_by_mid($mid, sub { - my ($smsg) = @_; - $smsg->load_expand; + my ($id, $prev); + while (my $smsg = $srch->next_by_mid($mid, \$id, \$prev)) { my $msg = $ibx->msg_by_smsg($smsg); if (!defined($msg)) { warn "broken smsg for $mid\n"; - return 1; # continue + next; } my $cur = PublicInbox::MIME->new($msg); if (content_id($cur) eq $cid) { $smsg->{mime} = $cur; $found = $smsg; - return 0; # break out of loop + last; } # XXX DEBUG_DIFF is experimental and may be removed diff($mid, $cur, $mime) if $ENV{DEBUG_DIFF}; - - 1; # continue - }); + } $found; } @@ -770,15 +765,14 @@ sub unindex_oid { my $mime = PublicInbox::MIME->new($msgref); my $mids = mids($mime->header_obj); $mime = $msgref = undef; - + my $srch = $self->{-inbox}->search; foreach my $mid (@$mids) { my %gone; - $self->{-inbox}->search->reopen->each_smsg_by_mid($mid, sub { - my ($smsg) = @_; - $smsg->load_expand; + my ($id, $prev); + while (my $smsg = $srch->next_by_mid($mid, \$id, \$prev)) { $gone{$smsg->num} = 1 if $oid eq $smsg->{blob}; 1; # continue - }); + } my $n = scalar keys %gone; next unless $n; if ($n > 1) { diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm index cbed9163..94058ed0 100644 --- a/lib/PublicInbox/View.pm +++ b/lib/PublicInbox/View.pm @@ -58,20 +58,16 @@ sub msg_page { my ($ctx) = @_; my $mid = $ctx->{mid}; my $ibx = $ctx->{-inbox}; - my ($first, $more, $head, $tail, $db); + my ($first, $more); my $smsg; if (my $srch = $ibx->search) { - $srch->retry_reopen(sub { - ($head, $tail, $db) = $srch->each_smsg_by_mid($mid); - for (; !defined($first) && $head != $tail; $head++) { - my @args = ($head, $db, $mid); - $smsg = PublicInbox::SearchMsg->get(@args); - $first = $ibx->msg_by_smsg($smsg); - } - if ($head != $tail) { - $more = [ $head, $tail, $db ]; - } - }); + my ($id, $prev); + $smsg = $srch->next_by_mid($mid, \$id, \$prev); + $first = $ibx->msg_by_smsg($smsg) if $smsg; + if ($first) { + my $next = $srch->next_by_mid($mid, \$id, \$prev); + $more = [ $id, $prev, $next ] if $next; + } return unless $first; } else { $first = $ibx->msg_by_mid($mid) or return; @@ -82,18 +78,11 @@ sub msg_page { sub msg_html_more { my ($ctx, $more, $nr) = @_; my $str = eval { - my $smsg; - my ($head, $tail, $db) = @$more; + my ($id, $prev, $smsg) = @$more; my $mid = $ctx->{mid}; - for (; !defined($smsg) && $head != $tail; $head++) { - my $m = PublicInbox::SearchMsg->get($head, $db, $mid); - $smsg = $ctx->{-inbox}->smsg_mime($m); - } - if ($head == $tail) { # done - @$more = (); - } else { - $more->[0] = $head; - } + $smsg = $ctx->{-inbox}->smsg_mime($smsg); + my $next = $ctx->{srch}->next_by_mid($mid, \$id, \$prev); + @$more = $next ? ($id, $prev, $next) : (); if ($smsg) { my $mime = $smsg->{mime}; my $upfx = '../' . mid_escape($smsg->mid) . '/'; -- cgit v1.2.3-24-ge0c7