about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong (Contractor, The Linux Foundation) <e@80x24.org>2018-03-02 09:53:11 +0000
committerEric Wong (Contractor, The Linux Foundation) <e@80x24.org>2018-03-02 09:53:11 +0000
commitaf2e250ca2704d06afe0a7ed862dcfca7f740de7 (patch)
tree7f55c7ae901c6b87714e0a94279b12561510758e
parent7cacb4ae964408519f5577b895897c447c272da6 (diff)
downloadpublic-inbox-af2e250ca2704d06afe0a7ed862dcfca7f740de7.tar.gz
It's shorter and more convenient, here.
-rw-r--r--lib/PublicInbox/MID.pm3
-rw-r--r--lib/PublicInbox/SearchIdx.pm39
2 files changed, 18 insertions, 24 deletions
diff --git a/lib/PublicInbox/MID.pm b/lib/PublicInbox/MID.pm
index 786c056f..4ccb704d 100644
--- a/lib/PublicInbox/MID.pm
+++ b/lib/PublicInbox/MID.pm
@@ -68,6 +68,9 @@ sub uniq_mids {
 }
 
 sub mids { uniq_mids($_[0], 'Message-Id') }
+
+# last References should be IRT, but some mail clients do things
+# out of order, so trust IRT over References iff IRT exists
 sub references { uniq_mids($_[0], 'References', 'In-Reply-To') }
 
 # RFC3986, section 3.3:
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index ed52e386..57aed75c 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -12,7 +12,7 @@ use warnings;
 use Fcntl qw(:flock :DEFAULT);
 use PublicInbox::MIME;
 use base qw(PublicInbox::Search);
-use PublicInbox::MID qw/mid_clean id_compress mid_mime/;
+use PublicInbox::MID qw/mid_clean id_compress mid_mime mids references/;
 use PublicInbox::MsgIter;
 use Carp qw(croak);
 use POSIX qw(strftime);
@@ -447,33 +447,24 @@ sub next_thread_id {
 
 sub parse_references ($) {
         my ($smsg) = @_;
-        my $doc = $smsg->{doc};
-        my $mid = $smsg->mid;
         my $mime = $smsg->{mime};
         my $hdr = $mime->header_obj;
-
-        # last References should be IRT, but some mail clients do things
-        # out of order, so trust IRT over References iff IRT exists
-        my @refs = (($hdr->header_raw('References') || '') =~ /<([^>]+)>/g);
-        push(@refs, (($hdr->header_raw('In-Reply-To') || '') =~ /<([^>]+)>/g));
-
-        if (@refs) {
-                my %uniq = ($mid => 1);
-                my @orig_refs = @refs;
-                @refs = ();
-
-                # prevent circular references via References: here:
-                foreach my $ref (@orig_refs) {
-                        if (length($ref) > MAX_MID_SIZE) {
-                                warn "References: <$ref> too long, ignoring\n";
-                        }
-                        next if $uniq{$ref};
-                        $uniq{$ref} = 1;
-                        push @refs, $ref;
+        my $refs = references($hdr);
+        return $refs if scalar(@$refs) == 0;
+
+        # prevent circular references via References here:
+        my %mids = map { $_ => 1 } @{mids($hdr)};
+        my @keep;
+        foreach my $ref (@$refs) {
+                # FIXME: this is an archive-prevention vector like X-No-Archive
+                if (length($ref) > MAX_MID_SIZE) {
+                        warn "References: <$ref> too long, ignoring\n";
                 }
+                next if $mids{$ref};
+                push @keep, $ref;
         }
-        $smsg->{references} = '<'.join('> <', @refs).'>' if @refs;
-        \@refs
+        $smsg->{references} = '<'.join('> <', @keep).'>' if @keep;
+        \@keep;
 }
 
 sub link_message {