about summary refs log tree commit homepage
diff options
context:
space:
mode:
-rw-r--r--lib/PublicInbox/MID.pm11
-rw-r--r--lib/PublicInbox/SearchIdx.pm15
-rw-r--r--lib/PublicInbox/SearchIdxSkeleton.pm3
3 files changed, 16 insertions, 13 deletions
diff --git a/lib/PublicInbox/MID.pm b/lib/PublicInbox/MID.pm
index 96085399..422902f5 100644
--- a/lib/PublicInbox/MID.pm
+++ b/lib/PublicInbox/MID.pm
@@ -10,7 +10,10 @@ our @EXPORT_OK = qw/mid_clean id_compress mid2path mid_mime mid_escape MID_ESC
         mids references/;
 use URI::Escape qw(uri_escape_utf8);
 use Digest::SHA qw/sha1_hex/;
-use constant MID_MAX => 40; # SHA-1 hex length
+use constant {
+        MID_MAX => 40, # SHA-1 hex length # TODO: get rid of this
+        MAX_MID_SIZE => 244, # max term size (Xapian limitation) - length('Q')
+};
 
 sub mid_clean {
         my ($mid) = @_;
@@ -61,6 +64,12 @@ sub mids ($) {
                         push(@mids, $v);
                 }
         }
+        foreach my $i (0..$#mids) {
+                next if length($mids[$i]) <= MAX_MID_SIZE;
+                warn "Message-ID: <$mids[$i]> too long, truncating\n";
+                $mids[$i] = substr($mids[$i], 0, MAX_MID_SIZE);
+        }
+
         uniq_mids(\@mids);
 }
 
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 3ef444d6..a70e1ebf 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -19,7 +19,6 @@ use POSIX qw(strftime);
 require PublicInbox::Git;
 
 use constant {
-        MAX_MID_SIZE => 244, # max term size (Xapian limitation) - length('Q')
         PERM_UMASK => 0,
         OLD_PERM_GROUP => 1,
         OLD_PERM_EVERYBODY => 2,
@@ -311,12 +310,6 @@ sub add_message {
         eval {
                 my $smsg = PublicInbox::SearchMsg->new($mime);
                 my $doc = $smsg->{doc};
-                foreach my $mid (@$mids) {
-                        # FIXME: may be abused to prevent archival
-                        length($mid) > MAX_MID_SIZE and
-                                die 'Message-ID too long';
-                        $doc->add_term('Q' . $mid);
-                }
                 my $subj = $smsg->subject;
                 my $xpath;
                 if ($subj ne '') {
@@ -392,9 +385,11 @@ sub add_message {
                                 }
                         }
                 }
+
                 if ($skel) {
                         push @values, $mids, $xpath, $data;
                         $skel->index_skeleton(\@values);
+                        $doc->add_boolean_term('Q' . $_) foreach @$mids;
                         $doc_id = $self->{xdb}->add_document($doc);
                 } else {
                         $doc_id = link_and_save($self, $doc, $mids, $refs,
@@ -469,9 +464,9 @@ sub parse_references ($) {
         my %mids = map { $_ => 1 } @{mids($hdr)};
         my @keep;
         foreach my $ref (@$refs) {
-                # FIXME: this is an archive-prevention vector like X-No-Archive
-                if (length($ref) > MAX_MID_SIZE) {
+                if (length($ref) > PublicInbox::MID::MAX_MID_SIZE) {
                         warn "References: <$ref> too long, ignoring\n";
+                        next;
                 }
                 next if $mids{$ref};
                 push @keep, $ref;
@@ -510,6 +505,8 @@ sub link_and_save {
         my $doc_id;
         $doc->add_boolean_term('XNUM' . $num) if defined $num;
         $doc->add_boolean_term('XPATH' . $xpath) if defined $xpath;
+        $doc->add_boolean_term('Q' . $_) foreach @$mids;
+
         my $vivified = 0;
         foreach my $mid (@$mids) {
                 $self->each_smsg_by_mid($mid, sub {
diff --git a/lib/PublicInbox/SearchIdxSkeleton.pm b/lib/PublicInbox/SearchIdxSkeleton.pm
index 4066b591..40b28c51 100644
--- a/lib/PublicInbox/SearchIdxSkeleton.pm
+++ b/lib/PublicInbox/SearchIdxSkeleton.pm
@@ -98,9 +98,6 @@ sub index_skeleton_real ($$) {
         my $ts = $values->[PublicInbox::Search::TS];
         my $smsg = PublicInbox::SearchMsg->new(undef);
         my $doc = $smsg->{doc};
-        foreach my $mid (@$mids) {
-                $doc->add_term('Q' . $mid);
-        }
         PublicInbox::SearchIdx::add_values($doc, $values);
         $doc->set_data($doc_data);
         $smsg->{ts} = $ts;