diff options
-rw-r--r-- | lib/PublicInbox/MID.pm | 11 | ||||
-rw-r--r-- | lib/PublicInbox/SearchIdx.pm | 15 | ||||
-rw-r--r-- | lib/PublicInbox/SearchIdxSkeleton.pm | 3 |
3 files changed, 16 insertions, 13 deletions
diff --git a/lib/PublicInbox/MID.pm b/lib/PublicInbox/MID.pm index 96085399..422902f5 100644 --- a/lib/PublicInbox/MID.pm +++ b/lib/PublicInbox/MID.pm @@ -10,7 +10,10 @@ our @EXPORT_OK = qw/mid_clean id_compress mid2path mid_mime mid_escape MID_ESC mids references/; use URI::Escape qw(uri_escape_utf8); use Digest::SHA qw/sha1_hex/; -use constant MID_MAX => 40; # SHA-1 hex length +use constant { + MID_MAX => 40, # SHA-1 hex length # TODO: get rid of this + MAX_MID_SIZE => 244, # max term size (Xapian limitation) - length('Q') +}; sub mid_clean { my ($mid) = @_; @@ -61,6 +64,12 @@ sub mids ($) { push(@mids, $v); } } + foreach my $i (0..$#mids) { + next if length($mids[$i]) <= MAX_MID_SIZE; + warn "Message-ID: <$mids[$i]> too long, truncating\n"; + $mids[$i] = substr($mids[$i], 0, MAX_MID_SIZE); + } + uniq_mids(\@mids); } diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 3ef444d6..a70e1ebf 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -19,7 +19,6 @@ use POSIX qw(strftime); require PublicInbox::Git; use constant { - MAX_MID_SIZE => 244, # max term size (Xapian limitation) - length('Q') PERM_UMASK => 0, OLD_PERM_GROUP => 1, OLD_PERM_EVERYBODY => 2, @@ -311,12 +310,6 @@ sub add_message { eval { my $smsg = PublicInbox::SearchMsg->new($mime); my $doc = $smsg->{doc}; - foreach my $mid (@$mids) { - # FIXME: may be abused to prevent archival - length($mid) > MAX_MID_SIZE and - die 'Message-ID too long'; - $doc->add_term('Q' . $mid); - } my $subj = $smsg->subject; my $xpath; if ($subj ne '') { @@ -392,9 +385,11 @@ sub add_message { } } } + if ($skel) { push @values, $mids, $xpath, $data; $skel->index_skeleton(\@values); + $doc->add_boolean_term('Q' . $_) foreach @$mids; $doc_id = $self->{xdb}->add_document($doc); } else { $doc_id = link_and_save($self, $doc, $mids, $refs, @@ -469,9 +464,9 @@ sub parse_references ($) { my %mids = map { $_ => 1 } @{mids($hdr)}; my @keep; foreach my $ref (@$refs) { - # FIXME: this is an archive-prevention vector like X-No-Archive - if (length($ref) > MAX_MID_SIZE) { + if (length($ref) > PublicInbox::MID::MAX_MID_SIZE) { warn "References: <$ref> too long, ignoring\n"; + next; } next if $mids{$ref}; push @keep, $ref; @@ -510,6 +505,8 @@ sub link_and_save { my $doc_id; $doc->add_boolean_term('XNUM' . $num) if defined $num; $doc->add_boolean_term('XPATH' . $xpath) if defined $xpath; + $doc->add_boolean_term('Q' . $_) foreach @$mids; + my $vivified = 0; foreach my $mid (@$mids) { $self->each_smsg_by_mid($mid, sub { diff --git a/lib/PublicInbox/SearchIdxSkeleton.pm b/lib/PublicInbox/SearchIdxSkeleton.pm index 4066b591..40b28c51 100644 --- a/lib/PublicInbox/SearchIdxSkeleton.pm +++ b/lib/PublicInbox/SearchIdxSkeleton.pm @@ -98,9 +98,6 @@ sub index_skeleton_real ($$) { my $ts = $values->[PublicInbox::Search::TS]; my $smsg = PublicInbox::SearchMsg->new(undef); my $doc = $smsg->{doc}; - foreach my $mid (@$mids) { - $doc->add_term('Q' . $mid); - } PublicInbox::SearchIdx::add_values($doc, $values); $doc->set_data($doc_data); $smsg->{ts} = $ts; |