From: "Eric Wong (Contractor, The Linux Foundation)" <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 3/4] www: favor reading more from SQLite, and less from Xapian
Date: Fri, 6 Apr 2018 21:44:38 +0000 [thread overview]
Message-ID: <20180406214439.29591-4-e@80x24.org> (raw)
In-Reply-To: <20180406214439.29591-1-e@80x24.org>
Favor simpler internal APIs this time around, this cuts
a fair amount of code out and takes another step towards
removing Xapian as a dependency for v2 repos.
---
lib/PublicInbox/Inbox.pm | 2 +-
lib/PublicInbox/Mbox.pm | 39 +++++++++++++-------------------
lib/PublicInbox/Over.pm | 29 ++++++++++++++++++++++++
lib/PublicInbox/Search.pm | 52 ++++---------------------------------------
lib/PublicInbox/SearchIdx.pm | 3 ++-
lib/PublicInbox/V2Writable.pm | 34 ++++++++++++----------------
lib/PublicInbox/View.pm | 35 ++++++++++-------------------
t/psgi_v2.t | 1 +
t/search.t | 16 +++++--------
t/v2writable.t | 18 +++++++--------
10 files changed, 92 insertions(+), 137 deletions(-)
diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm
index 0ea18b4..f71493a 100644
--- a/lib/PublicInbox/Inbox.pm
+++ b/lib/PublicInbox/Inbox.pm
@@ -273,7 +273,7 @@ sub msg_by_smsg ($$;$) {
my ($self, $smsg, $ref) = @_;
# ghosts may have undef smsg (from SearchThread.node) or
- # no {blob} field (from each_smsg_by_mid)
+ # no {blob} field
return unless defined $smsg;
defined(my $blob = $smsg->{blob}) or return;
diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm
index c5e1cb9..4427ae5 100644
--- a/lib/PublicInbox/Mbox.pm
+++ b/lib/PublicInbox/Mbox.pm
@@ -34,19 +34,17 @@ sub mb_stream {
# called by PSGI server as body response
sub getline {
my ($more) = @_; # self
- my ($ctx, $head, $tail, $db, $cur) = @$more;
- if ($cur) {
+ my ($ctx, $id, $prev, $next, $cur) = @$more;
+ if ($cur) { # first
pop @$more;
return msg_str($ctx, $cur);
}
- for (; !defined($cur) && $head != $tail; $head++) {
- my $smsg = PublicInbox::SearchMsg->get($head, $db, $ctx->{mid});
- my $mref = $ctx->{-inbox}->msg_by_smsg($smsg) or next;
- $cur = Email::Simple->new($mref);
- $cur = msg_str($ctx, $cur);
- }
- $more->[1] = $head;
- $cur;
+ $cur = $next or return;
+ my $ibx = $ctx->{-inbox};
+ $next = $ibx->search->next_by_mid($ctx->{mid}, \$id, \$prev);
+ @$more = ($ctx, $id, $prev, $next); # $next may be undef, here
+ my $mref = $ibx->msg_by_smsg($cur) or return;
+ msg_str($ctx, Email::Simple->new($mref));
}
sub close {} # noop
@@ -57,21 +55,14 @@ sub emit_raw {
my $ibx = $ctx->{-inbox};
my $first;
my $more;
- my ($head, $tail, $db);
- my %seen;
if (my $srch = $ibx->search) {
- $srch->retry_reopen(sub {
- ($head, $tail, $db) = $srch->each_smsg_by_mid($mid);
- for (; !defined($first) && $head != $tail; $head++) {
- my @args = ($head, $db, $mid);
- my $smsg = PublicInbox::SearchMsg->get(@args);
- my $mref = $ibx->msg_by_smsg($smsg) or next;
- $first = Email::Simple->new($mref);
- }
- if ($head != $tail) {
- $more = [ $ctx, $head, $tail, $db, $first ];
- }
- });
+ my ($id, $prev);
+ my $smsg = $srch->next_by_mid($mid, \$id, \$prev) or return;
+ my $mref = $ibx->msg_by_smsg($smsg) or return;
+ $first = Email::Simple->new($mref);
+ my $next = $srch->next_by_mid($mid, \$id, \$prev);
+ # $more is for ->getline
+ $more = [ $ctx, $id, $prev, $next, $first ] if $next;
} else {
my $mref = $ibx->msg_by_mid($mid) or return;
$first = Email::Simple->new($mref);
diff --git a/lib/PublicInbox/Over.pm b/lib/PublicInbox/Over.pm
index 0bd6008..309e044 100644
--- a/lib/PublicInbox/Over.pm
+++ b/lib/PublicInbox/Over.pm
@@ -147,4 +147,33 @@ SELECT * from OVER where num = ? LIMIT 1
undef;
}
+sub next_by_mid {
+ my ($self, $mid, $id, $prev) = @_;
+ my $dbh = $self->connect;
+
+ unless (defined $$id) {
+ my $sth = $dbh->prepare_cached(<<'', undef, 1);
+ SELECT id FROM msgid WHERE mid = ? LIMIT 1
+
+ $sth->execute($mid);
+ $$id = $sth->fetchrow_array;
+ defined $$id or return;
+ }
+ my $sth = $dbh->prepare_cached(<<"", undef, 1);
+SELECT num FROM id2num WHERE id = ? AND num > ?
+ORDER BY num ASC LIMIT 1
+
+ $$prev ||= 0;
+ $sth->execute($$id, $$prev);
+ my $num = $sth->fetchrow_array or return;
+ $$prev = $num;
+
+ $sth = $dbh->prepare_cached(<<"", undef, 1);
+SELECT num,ts,ds,ddd FROM over WHERE num = ? LIMIT 1
+
+ $sth->execute($num);
+ my $smsg = $sth->fetchrow_hashref or return;
+ load_from_row($smsg);
+}
+
1;
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 34ebd1a..7175ddc 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -304,58 +304,14 @@ sub query_ts {
$self->{over_ro}->query_ts($ts, $prev);
}
-sub first_smsg_by_mid {
- my ($self, $mid) = @_;
- my $smsg;
- retry_reopen($self, sub {
- each_smsg_by_mid($self, $mid, sub { $smsg = $_[0]; undef });
- });
- $smsg;
-}
-
sub lookup_article {
my ($self, $num) = @_;
- my $term = 'XNUM'.$num;
- my $db = $self->{xdb};
- retry_reopen($self, sub {
- my $head = $db->postlist_begin($term);
- my $tail = $db->postlist_end($term);
- return if $head->equal($tail);
- my $doc_id = $head->get_docid;
- return unless defined $doc_id;
- $head->inc;
- if ($head->nequal($tail)) {
- warn "article #$num is not unique\n";
- }
- # raises on error:
- my $doc = $db->get_document($doc_id);
- my $smsg = PublicInbox::SearchMsg->wrap($doc);
- $smsg->{doc_id} = $doc_id;
- $smsg->load_expand;
- });
+ $self->{over_ro}->get_art($num);
}
-sub each_smsg_by_mid {
- my ($self, $mid, $cb) = @_;
- # XXX retry_reopen isn't necessary for V2Writable, but the PSGI
- # interface will need it...
- my $db = $self->{xdb};
- my $term = 'Q' . $mid;
- my $head = $db->postlist_begin($term);
- my $tail = $db->postlist_end($term);
- if ($head == $tail) {
- $db->reopen;
- $head = $db->postlist_begin($term);
- $tail = $db->postlist_end($term);
- }
- return ($head, $tail, $db) if wantarray;
- for (; $head->nequal($tail); $head->inc) {
- my $doc_id = $head->get_docid;
- my $doc = $db->get_document($doc_id);
- my $smsg = PublicInbox::SearchMsg->wrap($doc, $mid);
- $smsg->{doc_id} = $doc_id;
- $cb->($smsg) or return;
- }
+sub next_by_mid {
+ my ($self, $mid, $id, $prev) = @_;
+ $self->{over_ro}->next_by_mid($mid, $id, $prev);
}
# normalize subjects so they are suitable as pathnames for URLs
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 4256263..3596972 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -76,7 +76,8 @@ sub new {
if ($version == 1) {
$self->{lock_path} = "$mainrepo/ssoma.lock";
my $dir = $self->xdir;
- $self->{over} = PublicInbox::OverIdx->new("$dir/over.sqlite3");
+ $self->{over_ro} = $self->{over} =
+ PublicInbox::OverIdx->new("$dir/over.sqlite3");
} elsif ($version == 2) {
defined $part or die "partition is required for v2\n";
# partition is a number
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index a8117aa..877a459 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -271,22 +271,20 @@ sub remove_internal {
foreach my $mid (@$mids) {
my %gone;
- $srch->reopen->each_smsg_by_mid($mid, sub {
- my ($smsg) = @_;
- $smsg->load_expand;
+ my ($id, $prev);
+ while (my $smsg = $srch->next_by_mid($mid, \$id, \$prev)) {
my $msg = $ibx->msg_by_smsg($smsg);
if (!defined($msg)) {
warn "broken smsg for $mid\n";
- return 1; # continue
+ next; # continue
}
my $orig = $$msg;
my $cur = PublicInbox::MIME->new($msg);
if (content_id($cur) eq $cid) {
$smsg->{mime} = $cur;
- $gone{$smsg->num} = [ $smsg, \$orig ];
+ $gone{$smsg->{num}} = [ $smsg, \$orig ];
}
- 1; # continue
- });
+ }
my $n = scalar keys %gone;
next unless $n;
if ($n > 1) {
@@ -552,26 +550,23 @@ sub lookup_content {
my $srch = $ibx->search->reopen;
my $cid = content_id($mime);
my $found;
- $srch->each_smsg_by_mid($mid, sub {
- my ($smsg) = @_;
- $smsg->load_expand;
+ my ($id, $prev);
+ while (my $smsg = $srch->next_by_mid($mid, \$id, \$prev)) {
my $msg = $ibx->msg_by_smsg($smsg);
if (!defined($msg)) {
warn "broken smsg for $mid\n";
- return 1; # continue
+ next;
}
my $cur = PublicInbox::MIME->new($msg);
if (content_id($cur) eq $cid) {
$smsg->{mime} = $cur;
$found = $smsg;
- return 0; # break out of loop
+ last;
}
# XXX DEBUG_DIFF is experimental and may be removed
diff($mid, $cur, $mime) if $ENV{DEBUG_DIFF};
-
- 1; # continue
- });
+ }
$found;
}
@@ -770,15 +765,14 @@ sub unindex_oid {
my $mime = PublicInbox::MIME->new($msgref);
my $mids = mids($mime->header_obj);
$mime = $msgref = undef;
-
+ my $srch = $self->{-inbox}->search;
foreach my $mid (@$mids) {
my %gone;
- $self->{-inbox}->search->reopen->each_smsg_by_mid($mid, sub {
- my ($smsg) = @_;
- $smsg->load_expand;
+ my ($id, $prev);
+ while (my $smsg = $srch->next_by_mid($mid, \$id, \$prev)) {
$gone{$smsg->num} = 1 if $oid eq $smsg->{blob};
1; # continue
- });
+ }
my $n = scalar keys %gone;
next unless $n;
if ($n > 1) {
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index cbed916..94058ed 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -58,20 +58,16 @@ sub msg_page {
my ($ctx) = @_;
my $mid = $ctx->{mid};
my $ibx = $ctx->{-inbox};
- my ($first, $more, $head, $tail, $db);
+ my ($first, $more);
my $smsg;
if (my $srch = $ibx->search) {
- $srch->retry_reopen(sub {
- ($head, $tail, $db) = $srch->each_smsg_by_mid($mid);
- for (; !defined($first) && $head != $tail; $head++) {
- my @args = ($head, $db, $mid);
- $smsg = PublicInbox::SearchMsg->get(@args);
- $first = $ibx->msg_by_smsg($smsg);
- }
- if ($head != $tail) {
- $more = [ $head, $tail, $db ];
- }
- });
+ my ($id, $prev);
+ $smsg = $srch->next_by_mid($mid, \$id, \$prev);
+ $first = $ibx->msg_by_smsg($smsg) if $smsg;
+ if ($first) {
+ my $next = $srch->next_by_mid($mid, \$id, \$prev);
+ $more = [ $id, $prev, $next ] if $next;
+ }
return unless $first;
} else {
$first = $ibx->msg_by_mid($mid) or return;
@@ -82,18 +78,11 @@ sub msg_page {
sub msg_html_more {
my ($ctx, $more, $nr) = @_;
my $str = eval {
- my $smsg;
- my ($head, $tail, $db) = @$more;
+ my ($id, $prev, $smsg) = @$more;
my $mid = $ctx->{mid};
- for (; !defined($smsg) && $head != $tail; $head++) {
- my $m = PublicInbox::SearchMsg->get($head, $db, $mid);
- $smsg = $ctx->{-inbox}->smsg_mime($m);
- }
- if ($head == $tail) { # done
- @$more = ();
- } else {
- $more->[0] = $head;
- }
+ $smsg = $ctx->{-inbox}->smsg_mime($smsg);
+ my $next = $ctx->{srch}->next_by_mid($mid, \$id, \$prev);
+ @$more = $next ? ($id, $prev, $next) : ();
if ($smsg) {
my $mime = $smsg->{mime};
my $upfx = '../' . mid_escape($smsg->mid) . '/';
diff --git a/t/psgi_v2.t b/t/psgi_v2.t
index bdf23de..faa139f 100644
--- a/t/psgi_v2.t
+++ b/t/psgi_v2.t
@@ -129,6 +129,7 @@ test_psgi(sub { $www->call(@_) }, sub {
is(scalar(@from_), 3, 'three From_ lines in t.mbox.gz');
# search interface
+ $config->each_inbox(sub { $_[0]->search->reopen });
$res = $cb->(POST('/v2test/?q=m:a-mid@b&x=m'));
$in = $res->content;
$status = IO::Uncompress::Gunzip::gunzip(\$in => \$out);
diff --git a/t/search.t b/t/search.t
index 2f7b795..fda32d3 100644
--- a/t/search.t
+++ b/t/search.t
@@ -89,10 +89,9 @@ sub filter_mids {
{
$rw_commit->();
$ro->reopen;
- my $found = $ro->first_smsg_by_mid('root@s');
- ok($found, "message found");
- is($root_id, $found->{doc_id}, 'doc_id set correctly');
- is($found->mid, 'root@s', 'mid set correctly');
+ my $found = $ro->query('m:root@s');
+ is(scalar(@$found), 1, "message found");
+ is($found->[0]->mid, 'root@s', 'mid set correctly');
my ($res, @res);
my @exp = sort qw(root@s last@s);
@@ -276,10 +275,9 @@ sub filter_mids {
],
body => "LOOP!\n"));
ok($doc_id > 0, "doc_id defined with circular reference");
- my $smsg = $rw->first_smsg_by_mid('circle@a');
+ my $smsg = $rw->query('m:circle@a', {limit=>1})->[0];
is($smsg->references, '', "no references created");
- my $msg = PublicInbox::SearchMsg->load_doc($smsg->{doc});
- is($s, $msg->subject, 'long subject not rewritten');
+ is($s, $smsg->subject, 'long subject not rewritten');
}
{
@@ -293,9 +291,7 @@ sub filter_mids {
my $mime = Email::MIME->new($str);
my $doc_id = $rw->add_message($mime);
ok($doc_id > 0, 'message indexed doc_id with UTF-8');
- my $smsg = $rw->first_smsg_by_mid('testmessage@example.com');
- my $msg = PublicInbox::SearchMsg->load_doc($smsg->{doc});
-
+ my $msg = $rw->query('m:testmessage@example.com', {limit => 1})->[0];
is($mime->header('Subject'), $msg->subject, 'UTF-8 subject preserved');
}
diff --git a/t/v2writable.t b/t/v2writable.t
index ab85e9a..4a42c01 100644
--- a/t/v2writable.t
+++ b/t/v2writable.t
@@ -108,14 +108,13 @@ if ('ensure git configs are correct') {
$mime->header_set('References', '<zz-mid@b>');
ok($im->add($mime), 'message with multiple Message-ID');
$im->done;
- my @found;
my $srch = $ibx->search;
- $srch->reopen->each_smsg_by_mid('abcde@1', sub { push @found, @_; 1 });
- is(scalar(@found), 1, 'message found by first MID');
- $srch->reopen->each_smsg_by_mid('abcde@2', sub { push @found, @_; 1 });
- is(scalar(@found), 2, 'message found by second MID');
- is($found[0]->{doc_id}, $found[1]->{doc_id}, 'same document');
- ok($found[1]->{doc_id} > 0, 'doc_id is positive');
+ my $mset1 = $srch->reopen->query('m:abcde@1', { mset => 1 });
+ is($mset1->size, 1, 'message found by first MID');
+ my $mset2 = $srch->reopen->query('m:abcde@2', { mset => 1 });
+ is($mset2->size, 1, 'message found by second MID');
+ is((($mset1->items)[0])->get_docid, (($mset2->items)[0])->get_docid,
+ 'same document');
}
SKIP: {
@@ -224,9 +223,8 @@ EOF
like($smsg->num, qr/\A\d+\z/, 'numeric number in return message');
is($ibx->mm->mid_for($smsg->num), undef, 'no longer in Msgmap by num');
my $srch = $ibx->search->reopen;
- my @found = ();
- $srch->each_smsg_by_mid($smsg->mid, sub { push @found, @_; 1 });
- is(scalar(@found), 0, 'no longer found in Xapian');
+ my $mset = $srch->query('m:'.$smsg->mid, { mset => 1});
+ is($mset->size, 0, 'no longer found in Xapian');
my @log1 = qw(log -1 --pretty=raw --raw -r --no-abbrev --no-renames);
is($srch->{over_ro}->get_art($smsg->num), undef,
'removal propagated to Over DB');
--
EW
next prev parent reply other threads:[~2018-04-06 21:44 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-04-06 21:44 [PATCH 0/4] a few more odds and ends for v2 Eric Wong (Contractor, The Linux Foundation)
2018-04-06 21:44 ` [PATCH 1/4] altid: fix miscopied field name Eric Wong (Contractor, The Linux Foundation)
2018-04-06 21:44 ` [PATCH 2/4] nntp: set Xref across multiple inboxes Eric Wong (Contractor, The Linux Foundation)
2018-04-06 21:44 ` Eric Wong (Contractor, The Linux Foundation) [this message]
2018-04-06 21:44 ` [PATCH 4/4] ensure Xapian and SQLite are still optional for v1 tests Eric Wong (Contractor, The Linux Foundation)
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://public-inbox.org/README
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20180406214439.29591-4-e@80x24.org \
--to=e@80x24.org \
--cc=meta@public-inbox.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).