about summary refs log tree commit homepage
path: root/lib/PublicInbox/SearchIdx.pm
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2016-12-10 01:09:46 +0000
committerEric Wong <e@80x24.org>2016-12-10 03:23:38 +0000
commit86c018672f6bf9739a76489c8870c151d338fc15 (patch)
tree311c12eed2abc2d08e806f3716fd8f0b7a392dbc /lib/PublicInbox/SearchIdx.pm
parentf99f9048cdac42509fbbc1f97e2af32fa3bffca6 (diff)
downloadpublic-inbox-86c018672f6bf9739a76489c8870c151d338fc15.tar.gz
Some email clients set the References headers backwards, so
trust the In-Reply-To header if (and only if) it exists and
is parseable as direct parent of the current message.

For affected repos, this will require reindexing (via
"public-inbox-index --reindex"), but there will be no
version bump for this bugfix.
Diffstat (limited to 'lib/PublicInbox/SearchIdx.pm')
-rw-r--r--lib/PublicInbox/SearchIdx.pm22
1 files changed, 18 insertions, 4 deletions
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 4aac0281..832d1cbf 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -293,10 +293,10 @@ sub link_message {
         my $hdr = $mime->header_obj;
         my $refs = $hdr->header_raw('References');
         my @refs = $refs ? ($refs =~ /<([^>]+)>/g) : ();
-        if (my $irt = $hdr->header_raw('In-Reply-To')) {
-                # last References should be $irt
-                # we will de-dupe later
-                push @refs, mid_clean($irt);
+        my $irt = $hdr->header_raw('In-Reply-To');
+        if (defined $irt) {
+                $irt = mid_clean($irt);
+                $irt = undef if $mid eq $irt;
         }
 
         my $tid;
@@ -305,6 +305,15 @@ sub link_message {
                 my @orig_refs = @refs;
                 @refs = ();
 
+                if (defined $irt) {
+                        # to check MAX_MID_SIZE
+                        push @orig_refs, $irt;
+
+                        # below, we will ensure IRT (if specified)
+                        # is the last References
+                        $uniq{$irt} = 1;
+                }
+
                 # prevent circular references via References: here:
                 foreach my $ref (@orig_refs) {
                         if (length($ref) > MAX_MID_SIZE) {
@@ -315,6 +324,11 @@ sub link_message {
                         push @refs, $ref;
                 }
         }
+
+        # last References should be IRT, but some mail clients do things
+        # out of order, so trust IRT over References iff IRT exists
+        push @refs, $irt if defined $irt;
+
         if (@refs) {
                 $smsg->{references} = '<'.join('> <', @refs).'>';