From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id BFE8B1FB09 for ; Thu, 20 Aug 2020 20:25:00 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 15/23] searchview: use over.sqlite3 instead of Xapian docdata Date: Thu, 20 Aug 2020 20:24:49 +0000 Message-Id: <20200820202457.21042-16-e@yhbt.net> In-Reply-To: <20200820202457.21042-1-e@yhbt.net> References: <20200820202457.21042-1-e@yhbt.net> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: This is a step towards improving kernel page cache hit rates by relying on over.sqlite3 for document data instead of Xapian. Some micro-optimization to over->get_art was required to maintain performance. --- lib/PublicInbox/Over.pm | 18 +++++------------- lib/PublicInbox/SearchView.pm | 10 +++++++--- 2 files changed, 12 insertions(+), 16 deletions(-) diff --git a/lib/PublicInbox/Over.pm b/lib/PublicInbox/Over.pm index 81b9fca7..80e57e62 100644 --- a/lib/PublicInbox/Over.pm +++ b/lib/PublicInbox/Over.pm @@ -57,6 +57,7 @@ sub new { sub disconnect { my ($self) = @_; if (my $dbh = delete $self->{dbh}) { + delete $self->{-get_art}; $self->{filename} = $dbh->sqlite_db_filename; } } @@ -201,8 +202,8 @@ SELECT COUNT(num) FROM over WHERE num > 0 sub get_art { my ($self, $num) = @_; - my $dbh = $self->connect; - my $sth = $dbh->prepare_cached(<<'', undef, 1); + # caching $sth ourselves is faster than prepare_cached + my $sth = $self->{-get_art} //= $self->connect->prepare(<<''); SELECT num,ds,ts,ddd FROM over WHERE num = ? LIMIT 1 $sth->execute($num); @@ -230,13 +231,7 @@ ORDER BY num ASC LIMIT 1 $sth->execute($$id, $$prev); my $num = $sth->fetchrow_array or return; $$prev = $num; - - $sth = $dbh->prepare_cached(<<"", undef, 1); -SELECT num,ts,ds,ddd FROM over WHERE num = ? LIMIT 1 - - $sth->execute($num); - my $smsg = $sth->fetchrow_hashref or return; - load_from_row($smsg); + get_art($self, $num); } # IMAP search, this is limited by callers to UID_SLICE size (50K) @@ -278,10 +273,7 @@ sub check_inodes { my $st = pack('dd', $st[0], $st[1]); # don't actually reopen, just let {dbh} be recreated later - if ($st ne ($self->{st} // $st)) { - delete($self->{dbh}); - $self->{filename} = $f; - } + disconnect($self) if $st ne ($self->{st} // $st); } else { warn "W: stat $f: $!\n"; } diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm index 28d9ce5d..61534c25 100644 --- a/lib/PublicInbox/SearchView.pm +++ b/lib/PublicInbox/SearchView.pm @@ -13,6 +13,7 @@ use PublicInbox::WwwAtomStream; use PublicInbox::WwwStream qw(html_oneshot); use PublicInbox::SearchThread; use PublicInbox::SearchQuery; +use PublicInbox::Search qw(mdocid); my %rmap_inc; sub mbox_results { @@ -90,19 +91,22 @@ sub mset_summary { my $pfx = ' ' x $pad; my $res = \($ctx->{-html_tip}); my $ibx = $ctx->{-inbox}; - my $srch = $ibx->search; + my $over = $ibx->over; + my $nshard = $ibx->search->{nshard} // 1; my $obfs_ibx = $ibx->{obfuscate} ? $ibx : undef; foreach my $m ($mset->items) { my $rank = sprintf("%${pad}d", $m->get_rank + 1); my $pct = get_pct($m); - my $smsg = PublicInbox::Smsg::from_mitem($m, $srch); + my $num = mdocid($nshard, $m); + my $smsg = $over->get_art($num, 1); unless ($smsg) { eval { - $m = "$m ".$m->get_docid . " expired\n"; + $m = "$m $num expired\n"; $ctx->{env}->{'psgi.errors'}->print($m); }; next; } + PublicInbox::Smsg::psgi_cull($smsg); my $s = ascii_html($smsg->{subject}); my $f = ascii_html($smsg->{from_name}); if ($obfs_ibx) {