about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2019-10-24 00:12:40 +0000
committerEric Wong <e@80x24.org>2019-10-28 10:49:14 +0000
commite67dc3e9fc73029332a632d022002d3ce80c1306 (patch)
tree7790f5cc66b0035666575418b659f37876cfedf4
parenta8ae7e31ac36bcda04bf13a1b834207a89b0014c (diff)
downloadpublic-inbox-e67dc3e9fc73029332a632d022002d3ce80c1306.tar.gz
Since we replace extra Message-ID headers with X-Alt-Message-ID
to placate NNTP clients, we should allow searching and indexing
on X-Alt-Message-ID just like we do with Message-ID.
-rw-r--r--lib/PublicInbox/MID.pm27
-rw-r--r--lib/PublicInbox/OverIdx.pm4
-rw-r--r--lib/PublicInbox/SearchIdx.pm4
-rw-r--r--t/mid.t7
-rw-r--r--t/v2writable.t16
5 files changed, 47 insertions, 11 deletions
diff --git a/lib/PublicInbox/MID.pm b/lib/PublicInbox/MID.pm
index 14089f91..d7a42c38 100644
--- a/lib/PublicInbox/MID.pm
+++ b/lib/PublicInbox/MID.pm
@@ -7,7 +7,7 @@ use strict;
 use warnings;
 use base qw/Exporter/;
 our @EXPORT_OK = qw/mid_clean id_compress mid2path mid_mime mid_escape MID_ESC
-        mids references/;
+        mids references mids_for_index/;
 use URI::Escape qw(uri_escape_utf8);
 use Digest::SHA qw/sha1_hex/;
 require PublicInbox::Address;
@@ -54,11 +54,10 @@ sub mid2path {
 # Only for v1 code paths:
 sub mid_mime ($) { mids($_[0]->header_obj)->[0] }
 
-sub mids ($) {
-        my ($hdr) = @_;
+# only intended for Message-ID and X-Alt-Message-ID
+sub extract_mids {
         my @mids;
-        my @v = $hdr->header_raw('Message-Id');
-        foreach my $v (@v) {
+        for my $v (@_) {
                 my @cur = ($v =~ /<([^>]+)>/sg);
                 if (@cur) {
                         push(@mids, @cur);
@@ -66,7 +65,23 @@ sub mids ($) {
                         push(@mids, $v);
                 }
         }
-        uniq_mids(\@mids);
+        \@mids;
+}
+
+sub mids ($) {
+        my ($hdr) = @_;
+        my @mids = $hdr->header_raw('Message-Id');
+        uniq_mids(extract_mids(@mids));
+}
+
+# we allow searching on X-Alt-Message-ID since PublicInbox::NNTP uses them
+# to placate some clients, and we want to ensure NNTP-only clients can
+# import and index without relying on HTTP endpoints
+sub mids_for_index ($) {
+        my ($hdr) = @_;
+        my @mids = $hdr->header_raw('Message-Id');
+        my @alts = $hdr->header_raw('X-Alt-Message-ID');
+        uniq_mids(extract_mids(@mids, @alts));
 }
 
 # last References should be IRT, but some mail clients do things
diff --git a/lib/PublicInbox/OverIdx.pm b/lib/PublicInbox/OverIdx.pm
index 01ca6f11..189bd21d 100644
--- a/lib/PublicInbox/OverIdx.pm
+++ b/lib/PublicInbox/OverIdx.pm
@@ -13,7 +13,7 @@ use warnings;
 use base qw(PublicInbox::Over);
 use IO::Handle;
 use DBI qw(:sql_types); # SQL_BLOB
-use PublicInbox::MID qw/id_compress mids references/;
+use PublicInbox::MID qw/id_compress mids_for_index references/;
 use PublicInbox::SearchMsg qw(subject_normalized);
 use Compress::Zlib qw(compress);
 use PublicInbox::Search;
@@ -256,7 +256,7 @@ sub add_overview {
                 lines => $lines,
                 blob => $oid,
         }, 'PublicInbox::SearchMsg';
-        my $mids = mids($mime->header_obj);
+        my $mids = mids_for_index($mime->header_obj);
         my $refs = parse_references($smsg, $mid0, $mids);
         my $subj = $smsg->subject;
         my $xpath;
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index aed3875a..b2d71a1f 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -12,7 +12,7 @@ use warnings;
 use base qw(PublicInbox::Search PublicInbox::Lock);
 use PublicInbox::MIME;
 use PublicInbox::InboxWritable;
-use PublicInbox::MID qw/mid_clean id_compress mid_mime mids/;
+use PublicInbox::MID qw/mid_clean id_compress mid_mime mids_for_index/;
 use PublicInbox::MsgIter;
 use Carp qw(croak);
 use POSIX qw(strftime);
@@ -344,7 +344,7 @@ sub add_xapian ($$$$$) {
 sub add_message {
         # mime = Email::MIME object
         my ($self, $mime, $bytes, $num, $oid, $mid0) = @_;
-        my $mids = mids($mime->header_obj);
+        my $mids = mids_for_index($mime->header_obj);
         $mid0 = $mids->[0] unless defined $mid0; # v1 compatibility
         unless (defined $num) { # v1
                 $self->_msgmap_init;
diff --git a/t/mid.t b/t/mid.t
index 9ad10a99..98b0c200 100644
--- a/t/mid.t
+++ b/t/mid.t
@@ -1,7 +1,7 @@
 # Copyright (C) 2016-2019 all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 use Test::More;
-use PublicInbox::MID qw(mid_escape mids references);
+use PublicInbox::MID qw(mid_escape mids references mids_for_index);
 
 is(mid_escape('foo!@(bar)'), 'foo!@(bar)');
 is(mid_escape('foo%!@(bar)'), 'foo%25!@(bar)');
@@ -10,6 +10,7 @@ is(mid_escape('foo%!@(bar)'), 'foo%25!@(bar)');
 {
         use Email::MIME;
         my $mime = Email::MIME->create;
+        $mime->header_set('X-Alt-Message-ID', '<alt-id-for-nntp>');
         $mime->header_set('Message-Id', '<mid-1@a>');
         is_deeply(['mid-1@a'], mids($mime->header_obj), 'mids in common case');
         $mime->header_set('Message-Id', '<mid-1@a>', '<mid-2@b>');
@@ -40,6 +41,10 @@ is(mid_escape('foo%!@(bar)'), 'foo%25!@(bar)');
         $mime->header_set('To', 'u@example.com');
         $mime->header_set('References', '<hello> <world> <n> <u@example.com>');
         is_deeply(references($mime->header_obj), [qw(hello world)]);
+
+        is_deeply([qw(helloworld alt-id-for-nntp)],
+                mids_for_index($mime->header_obj),
+                'X-Alt-Message-ID can be indexed');
 }
 
 done_testing();
diff --git a/t/v2writable.t b/t/v2writable.t
index c2daac2f..2b825768 100644
--- a/t/v2writable.t
+++ b/t/v2writable.t
@@ -115,6 +115,7 @@ if ('ensure git configs are correct') {
 
 {
         $mime->header_set('Message-Id', '<abcde@1>', '<abcde@2>');
+        $mime->header_set('X-Alt-Message-Id', '<alt-id-for-nntp>');
         $mime->header_set('References', '<zz-mid@b>');
         ok($im->add($mime), 'message with multiple Message-ID');
         $im->done;
@@ -127,6 +128,21 @@ if ('ensure git configs are correct') {
         is($mset2->size, 1, 'message found by second MID');
         is((($mset1->items)[0])->get_docid, (($mset2->items)[0])->get_docid,
                 'same document') if ($mset1->size);
+
+        my $alt = $srch->reopen->query('m:alt-id-for-nntp', { mset => 1 });
+        is($alt->size, 1, 'message found by alt MID (NNTP)');
+        is((($alt->items)[0])->get_docid, (($mset1->items)[0])->get_docid,
+                'same document') if ($mset1->size);
+        $mime->header_set('X-Alt-Message-Id');
+
+        my %uniq;
+        for my $mid (qw(abcde@1 abcde@2 alt-id-for-nntp)) {
+                my $msgs = $ibx->over->get_thread($mid);
+                my $key = join(' ', sort(map { $_->{num} } @$msgs));
+                $uniq{$key}++;
+        }
+        is(scalar(keys(%uniq)), 1, 'all alt Message-ID queries give same smsg');
+        is_deeply([values(%uniq)], [3], '3 queries, 3 results');
 }
 
 {