about summary refs log tree commit homepage
path: root/lib/PublicInbox/ExtSearch.pm
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2020-11-30 23:37:42 +0000
committerEric Wong <e@80x24.org>2020-12-01 10:20:00 +0000
commit82ffb3c183ac20e00effa8a5a7b664eda59672de (patch)
treeef35f1a41fe3d9110fdac08730006b8bf16b3c83 /lib/PublicInbox/ExtSearch.pm
parentcb75a7aeb9fd530b1816b760c8c7f6cfb428c8e3 (diff)
downloadpublic-inbox-82ffb3c183ac20e00effa8a5a7b664eda59672de.tar.gz
nntp: make ->ALL Xref generation more fuzzy
For ->ALL users, this mitigates the regression introduced
by commit 811b8d3cbaa790f59b7b107140b86248da16499b
("nntp: xref: use ->ALL extindex if available"), since
it's common to cross post messages to some mailing
lists with per-list trailers for unsubscribe information.

We won't bother dealing with Bcc-ed messages since those
are nearly all spam when it comes to public mailing lists.

Fixes: 811b8d3cbaa790f5 ("nntp: xref: use ->ALL extindex if available")
Link: https://public-inbox.org/meta/20201130194201.GA6687@dcvr/
Diffstat (limited to 'lib/PublicInbox/ExtSearch.pm')
-rw-r--r--lib/PublicInbox/ExtSearch.pm31
1 files changed, 15 insertions, 16 deletions
diff --git a/lib/PublicInbox/ExtSearch.pm b/lib/PublicInbox/ExtSearch.pm
index 20ec3224..80455d8d 100644
--- a/lib/PublicInbox/ExtSearch.pm
+++ b/lib/PublicInbox/ExtSearch.pm
@@ -50,8 +50,7 @@ sub git {
         $self->{git} //= PublicInbox::Git->new("$self->{topdir}/ALL.git");
 }
 
-# returns an arrayref of [ $NEWSGROUP_NAME:$ART_NO ] using
-# the `xref3' table
+# returns a hashref of { $NEWSGROUP_NAME => $ART_NO } using the `xref3' table
 sub nntp_xref_for { # NNTP only
         my ($self, $xibx, $xsmsg) = @_;
         my $dbh = over($self)->dbh;
@@ -69,7 +68,9 @@ SELECT ibx_id FROM inboxes WHERE eidx_key = ? LIMIT 1
 SELECT docid FROM xref3 WHERE oidbin = ? AND xnum = ? AND ibx_id = ? LIMIT 1
 
         $sth->bind_param(1, pack('H*', $xsmsg->{blob}), SQL_BLOB);
-        $sth->bind_param(2, $xsmsg->{num});
+
+        # NNTP::cmd_over can set {num} to zero according to RFC 3977 8.3.2
+        $sth->bind_param(2, $xsmsg->{num} || $xsmsg->{-orig_num});
         $sth->bind_param(3, $xibx_id);
         $sth->execute;
         my $docid = $sth->fetchrow_array // do {
@@ -81,9 +82,9 @@ EOF
 
         # LIMIT is number of newsgroups on server:
         $sth = $dbh->prepare_cached(<<'', undef, 1);
-SELECT ibx_id,xnum FROM xref3 WHERE docid = ?
+SELECT ibx_id,xnum FROM xref3 WHERE docid = ? AND ibx_id != ?
 
-        $sth->execute($docid);
+        $sth->execute($docid, $xibx_id);
         my $rows = $sth->fetchall_arrayref;
 
         my $eidx_key_sth = $dbh->prepare_cached(<<'', undef, 1);
@@ -91,18 +92,16 @@ SELECT eidx_key FROM inboxes WHERE ibx_id = ? LIMIT 1
 
         my %xref = map {
                 my ($ibx_id, $xnum) = @$_;
-                if ($ibx_id == $xibx_id) {
-                        ();
-                } else {
-                        $eidx_key_sth->execute($ibx_id);
-                        my $eidx_key = $eidx_key_sth->fetchrow_array;
-
-                        # only include if there's a newsgroup name
-                        $eidx_key && index($eidx_key, '/') >= 0 ?
-                                () : ($eidx_key => $xnum)
-                }
+
+                $eidx_key_sth->execute($ibx_id);
+                my $eidx_key = $eidx_key_sth->fetchrow_array;
+
+                # only include if there's a newsgroup name
+                $eidx_key && index($eidx_key, '/') >= 0 ?
+                        () : ($eidx_key => $xnum)
         } @$rows;
-        [ map { "$_:$xref{$_}" } sort keys %xref ]; # match NNTP LIST order
+        $xref{$xibx->{newsgroup}} = $xsmsg->{num};
+        \%xref;
 }
 
 sub mm { undef }