From 35ff6bb106909b1c1232666a9792156dfa398ea8 Mon Sep 17 00:00:00 2001 From: "Eric Wong (Contractor, The Linux Foundation)" Date: Mon, 2 Apr 2018 00:04:52 +0000 Subject: replace Xapian skeleton with SQLite overview DB This ought to provide better performance and scalability which is less dependent on inbox size. Xapian does not seem optimized for some queries used by the WWW homepage, Atom feeds, XOVER and NEWNEWS NNTP commands. This can actually make Xapian optional for NNTP usage, and allow more functionality to work without Xapian installed. Indexing performance was extremely bad at first, but DBI::Profile helped me optimize away problematic queries. --- lib/PublicInbox/Over.pm | 119 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 119 insertions(+) create mode 100644 lib/PublicInbox/Over.pm (limited to 'lib/PublicInbox/Over.pm') diff --git a/lib/PublicInbox/Over.pm b/lib/PublicInbox/Over.pm new file mode 100644 index 00000000..cf7a8849 --- /dev/null +++ b/lib/PublicInbox/Over.pm @@ -0,0 +1,119 @@ +# Copyright (C) 2018 all contributors +# License: AGPL-3.0+ + +# for XOVER, OVER in NNTP, and feeds/homepage/threads in PSGI +# Unlike Msgmap, this is an _UNSTABLE_ database which can be +# tweaked/updated over time and rebuilt. +package PublicInbox::Over; +use strict; +use warnings; +use DBI; +use DBD::SQLite; +use PublicInbox::SearchMsg; +use Compress::Zlib qw(uncompress); + +sub dbh_new { + my ($self) = @_; + my $ro = ref($self) eq 'PublicInbox::Over'; + my $dbh = DBI->connect("dbi:SQLite:dbname=$self->{filename}",'','', { + AutoCommit => 1, + RaiseError => 1, + PrintError => 0, + ReadOnly => $ro, + sqlite_use_immediate_transaction => 1, + }); + $dbh->{sqlite_unicode} = 1; + $dbh; +} + +sub new { + my ($class, $f) = @_; + bless { filename => $f }, $class; +} + +sub disconnect { $_[0]->{dbh} = undef } + +sub connect { $_[0]->{dbh} ||= $_[0]->dbh_new } + +sub load_from_row { + my ($smsg) = @_; + bless $smsg, 'PublicInbox::SearchMsg'; + if (defined(my $data = delete $smsg->{ddd})) { + $data = uncompress($data); + utf8::decode($data); + $smsg->load_from_data($data); + } + $smsg +} + +sub do_get { + my ($self, $sql, $opts, @args) = @_; + my $dbh = $self->connect; + my $lim = (($opts->{limit} || 0) + 0) || 1000; + my $off = (($opts->{offset} || 0) + 0) || 0; + $sql .= "LIMIT $lim OFFSET $off"; + my $msgs = $dbh->selectall_arrayref($sql, { Slice => {} }, @args); + load_from_row($_) for @$msgs; + $msgs +} + +sub query_xover { + my ($self, $beg, $end, $off) = @_; + do_get($self, <<'', { offset => $off }, $beg, $end); +SELECT * FROM over WHERE num >= ? AND num <= ? +ORDER BY num ASC + +} + +sub query_ts { + my ($self, $ts, $opts) = @_; + do_get($self, <<'', $opts, $ts); +SELECT * FROM over WHERE num > 0 AND ts >= ? +ORDER BY ts ASC + +} + +sub get_thread { + my ($self, $mid, $opts) = @_; + my $dbh = $self->connect; + my ($tid, $sid) = $dbh->selectrow_array(<<'', undef, $mid); +SELECT tid,sid FROM over +LEFT JOIN id2num ON over.num = id2num.num +LEFT JOIN msgid ON id2num.id = msgid.id +WHERE msgid.mid = ? AND over.num > 0 +LIMIT 1 + + my $cond = 'FROM over WHERE (tid = ? OR sid = ?) AND num > 0'; + my $msgs = do_get($self, <<"", $opts, $tid, $sid); +SELECT * $cond +ORDER BY ts ASC + + my $nr = $dbh->selectrow_array(<<"", undef, $tid, $sid); +SELECT COUNT(num) $cond + + { total => $nr, msgs => $msgs }; +} + +sub recent { + my ($self, $opts) = @_; + my $msgs = do_get($self, <<'', $opts); +SELECT * FROM over WHERE num > 0 +ORDER BY ts DESC + + my $nr = $self->{dbh}->selectrow_array(<<''); +SELECT COUNT(num) FROM over WHERE num > 0 + + { total => $nr, msgs => $msgs }; +} + +sub get_art { + my ($self, $num) = @_; + my $dbh = $self->connect; + my $smsg = $dbh->selectrow_hashref(<<'', undef, $num); +SELECT * from OVER where num = ? LIMIT 1 + + return load_from_row($smsg) if $smsg; + undef; +} + +1; -- cgit v1.2.3-24-ge0c7