From c68a1fcd75f103d08f8c2dabf9b3a1db4239f59c Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 3 Oct 2015 10:02:33 +0000 Subject: drop Message-IDs longer than 244 bytes Xapian has this limit for terms, and there are likely no legitimate Message-IDs (or single header lines) this long; so there's no need to workaround this limit. --- lib/PublicInbox/SearchIdx.pm | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'lib/PublicInbox/SearchIdx.pm') diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 8184dc71..0646cfb3 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -8,6 +8,7 @@ use base qw(PublicInbox::Search); use PublicInbox::MID qw/mid_clean id_compress/; *xpfx = *PublicInbox::Search::xpfx; +use constant MAX_MID_SIZE => 244; # max term size - 1 in Xapian use constant { PERM_UMASK => 0, OLD_PERM_GROUP => 1, @@ -52,6 +53,7 @@ sub add_message { my $ct_msg = $mime->header('Content-Type') || 'text/plain'; eval { + die 'Message-ID too long' if length($mid) > MAX_MID_SIZE; my $smsg = $self->lookup_message($mid); my $doc; @@ -230,6 +232,9 @@ sub link_message_to_parents { # prevent circular references via References: here: foreach my $ref (@orig_refs) { + if (length($ref) > MAX_MID_SIZE) { + warn "References: <$ref> too long, ignoring\n"; + } next if $uniq{$ref}; $uniq{$ref} = 1; push @refs, $ref; -- cgit v1.2.3-24-ge0c7