diff options
author | Eric Wong <e@80x24.org> | 2018-08-05 06:04:40 +0000 |
---|---|---|
committer | Eric Wong <e@80x24.org> | 2018-08-05 06:12:33 +0000 |
commit | 0c22c3e3a99527ebf04a647ca287ea2fc9bdeddd (patch) | |
tree | 381fa5ea5c80452950c774fb278dc9f95cc0dfdc /lib/PublicInbox/Over.pm | |
parent | 861bec7bec5908871e5b0ede244cb1e990a47403 (diff) | |
download | public-inbox-0c22c3e3a99527ebf04a647ca287ea2fc9bdeddd.tar.gz |
The "loose" (Subject:-based) thread matching yields too many hits for some common subjects (e.g. "[GIT] Networking" on LKML) and causes thread skeletons to not show the current messages. Favor strict matches in the query and only add loose matches if there's space. While working on this, I noticed the backwards --reindex walk breaks `tid' on v1 repositories, at least. That bug was hidden by the Subject: match logic and not discovered until now. It will be fixed separately. Reported-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
Diffstat (limited to 'lib/PublicInbox/Over.pm')
-rw-r--r-- | lib/PublicInbox/Over.pm | 43 |
1 files changed, 35 insertions, 8 deletions
diff --git a/lib/PublicInbox/Over.pm b/lib/PublicInbox/Over.pm index b2f68835..dda1e6d0 100644 --- a/lib/PublicInbox/Over.pm +++ b/lib/PublicInbox/Over.pm @@ -11,6 +11,7 @@ use DBI; use DBD::SQLite; use PublicInbox::SearchMsg; use Compress::Zlib qw(uncompress); +use constant DEFAULT_LIMIT => 1000; sub dbh_new { my ($self) = @_; @@ -53,7 +54,7 @@ sub load_from_row { sub do_get { my ($self, $sql, $opts, @args) = @_; my $dbh = $self->connect; - my $lim = (($opts->{limit} || 0) + 0) || 1000; + my $lim = (($opts->{limit} || 0) + 0) || DEFAULT_LIMIT; $sql .= "LIMIT $lim"; my $msgs = $dbh->selectall_arrayref($sql, { Slice => {} }, @args); load_from_row($_) for @$msgs; @@ -97,21 +98,47 @@ ORDER BY num ASC LIMIT 1 SELECT tid,sid FROM over WHERE num = ? LIMIT 1 defined $tid or return nothing; # $sid may be undef + + my $cond_all = '(tid = ? OR sid = ?) AND num > ?'; my $sort_col = 'ds'; $num = 0; - if ($prev) { + if ($prev) { # mboxrd stream, only $num = $prev->{num} || 0; $sort_col = 'num'; } - my $cond = '(tid = ? OR sid = ?) AND num > ?'; - my $msgs = do_get($self, <<"", {}, $tid, $sid, $num); -SELECT num,ts,ds,ddd FROM over WHERE $cond ORDER BY $sort_col ASC - return $msgs unless wantarray; + my $cols = 'num,ts,ds,ddd'; + unless (wantarray) { + return do_get($self, <<"", {}, $tid, $sid, $num); +SELECT $cols FROM over WHERE $cond_all +ORDER BY $sort_col ASC - my $nr = $dbh->selectrow_array(<<"", undef, $tid, $sid, $num); -SELECT COUNT(num) FROM over WHERE $cond + } + # HTML view always wants an array and never uses $prev, + # but the mbox stream never wants an array and always has $prev + die '$prev not supported with wantarray' if $prev; + my $nr = $dbh->selectrow_array(<<"", undef, $tid, $sid, $num); +SELECT COUNT(num) FROM over WHERE $cond_all + + # giant thread, prioritize strict (tid) matches and throw + # in the loose (sid) matches at the end + my $msgs = do_get($self, <<"", {}, $tid, $num); +SELECT $cols FROM over WHERE tid = ? AND num > ? +ORDER BY $sort_col ASC + + # do we have room for loose matches? get the most recent ones, first: + my $lim = DEFAULT_LIMIT - scalar(@$msgs); + if ($lim > 0) { + my $opts = { limit => $lim }; + my $loose = do_get($self, <<"", $opts, $tid, $sid, $num); +SELECT $cols FROM over WHERE tid != ? AND sid = ? AND num > ? +ORDER BY $sort_col DESC + + # TODO separate strict and loose matches here once --reindex + # is fixed to preserve `tid' properly + push @$msgs, @$loose; + } ($nr, $msgs); } |