about summary refs log tree commit homepage
path: root/lib
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2019-01-29 07:44:28 +0000
committerEric Wong <e@80x24.org>2019-01-29 20:53:46 +0000
commitb8eb1f39dc9aea6ce84373c50c47c6fc4ac8c503 (patch)
tree518b8899596471e9e038a8219236f23d70accfb6 /lib
parent54be6d7a117a300e46601537b4392c0174444966 (diff)
downloadpublic-inbox-b8eb1f39dc9aea6ce84373c50c47c6fc4ac8c503.tar.gz
Looking at git@vger history, several emails had broken
References/In-Reply-To pointing to <y>, <n> and email
addresses as Message-IDs in References and In-Reply-To
headers.

This was causing too many unrelated messages to be linked
together in the same thread.
Diffstat (limited to 'lib')
-rw-r--r--lib/PublicInbox/MID.pm25
1 files changed, 19 insertions, 6 deletions
diff --git a/lib/PublicInbox/MID.pm b/lib/PublicInbox/MID.pm
index cd56f272..7f1ab15e 100644
--- a/lib/PublicInbox/MID.pm
+++ b/lib/PublicInbox/MID.pm
@@ -10,6 +10,7 @@ our @EXPORT_OK = qw/mid_clean id_compress mid2path mid_mime mid_escape MID_ESC
         mids references/;
 use URI::Escape qw(uri_escape_utf8);
 use Digest::SHA qw/sha1_hex/;
+require PublicInbox::Address;
 use constant {
         MID_MAX => 40, # SHA-1 hex length # TODO: get rid of this
         MAX_MID_SIZE => 244, # max term size (Xapian limitation) - length('Q')
@@ -79,22 +80,34 @@ sub references ($) {
                         push(@mids, ($v =~ /<([^>]+)>/sg));
                 }
         }
-        uniq_mids(\@mids);
+
+        # old versions of git-send-email would prompt users for
+        # In-Reply-To and users' muscle memory would use 'y' or 'n'
+        # as responses:
+        my %addr = ( y => 1, n => 1 );
+
+        foreach my $f (qw(To From Cc)) {
+                my @v = $hdr->header_raw($f);
+                foreach my $v (@v) {
+                        $addr{$_} = 1 for (PublicInbox::Address::emails($v));
+                }
+        }
+        uniq_mids(\@mids, \%addr);
 }
 
-sub uniq_mids ($) {
-        my ($mids) = @_;
+sub uniq_mids ($;$) {
+        my ($mids, $seen) = @_;
         my @ret;
-        my %seen;
+        $seen ||= {};
         foreach my $mid (@$mids) {
                 $mid =~ tr/\n\t\r//d;
                 if (length($mid) > MAX_MID_SIZE) {
                         warn "Message-ID: <$mid> too long, truncating\n";
                         $mid = substr($mid, 0, MAX_MID_SIZE);
                 }
-                next if $seen{$mid};
+                next if $seen->{$mid};
                 push @ret, $mid;
-                $seen{$mid} = 1;
+                $seen->{$mid} = 1;
         }
         \@ret;
 }