From ccb66f35d293d3d0e3f9b119f7bd1fa019b93c75 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 11 Feb 2017 23:54:48 +0000 Subject: handle repeated References and In-Reply-To headers It seems possible for git-send-email(1) to generate repeated repeated instances of References and In-Reply-To headers, as evidenced in: https://public-inbox.org/git/20161111124541.8216-17-vascomalmeida@sapo.pt/raw This causes a mismatch between how our search indexer threads and how our HTML view handles threading. In the future, View.pm will use the smsg-parsed {references} field and avoid redoing Email::MIME header parsing. We will still need to figure out a way to deal with messages with repeated Message-IDs, at some point, too. --- lib/PublicInbox/SearchIdx.pm | 30 ++++++------------------------ lib/PublicInbox/SearchThread.pm | 2 +- lib/PublicInbox/View.pm | 19 +++++++++++-------- 3 files changed, 18 insertions(+), 33 deletions(-) diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 1142ca7a..8a529c66 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -291,17 +291,12 @@ sub link_message { my $mid = $smsg->mid; my $mime = $smsg->{mime}; my $hdr = $mime->header_obj; - my $refs = $hdr->header_raw('References'); - my @refs = defined $refs ? ($refs =~ /<([^>]+)>/g) : (); - my $irt = $hdr->header_raw('In-Reply-To'); - if (defined $irt) { - if ($irt eq '') { - $irt = undef; - } else { - $irt = mid_clean($irt); - $irt = undef if $mid eq $irt; - } - } + + # last References should be IRT, but some mail clients do things + # out of order, so trust IRT over References iff IRT exists + my @refs = ($hdr->header_raw('References'), + $hdr->header_raw('In-Reply-To')); + @refs = ((join(' ', @refs)) =~ /<([^>]+)>/g); my $tid; if (@refs) { @@ -309,15 +304,6 @@ sub link_message { my @orig_refs = @refs; @refs = (); - if (defined $irt) { - # to check MAX_MID_SIZE - push @orig_refs, $irt; - - # below, we will ensure IRT (if specified) - # is the last References - $uniq{$irt} = 1; - } - # prevent circular references via References: here: foreach my $ref (@orig_refs) { if (length($ref) > MAX_MID_SIZE) { @@ -329,10 +315,6 @@ sub link_message { } } - # last References should be IRT, but some mail clients do things - # out of order, so trust IRT over References iff IRT exists - push @refs, $irt if defined $irt; - if (@refs) { $smsg->{references} = '<'.join('> <', @refs).'>'; diff --git a/lib/PublicInbox/SearchThread.pm b/lib/PublicInbox/SearchThread.pm index 2cd066db..2966907a 100644 --- a/lib/PublicInbox/SearchThread.pm +++ b/lib/PublicInbox/SearchThread.pm @@ -7,7 +7,7 @@ # Mail::Thread is unmaintained and unavailable on some distros. # We also do not want pruning or subject grouping, since we want # to encourage strict threading and hopefully encourage people -# to use proper In-Reply-To. +# to use proper In-Reply-To/References. # # This includes fixes from several open bugs for Mail::Thread # diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm index 2c37cd42..0b1ec75b 100644 --- a/lib/PublicInbox/View.pm +++ b/lib/PublicInbox/View.pm @@ -92,13 +92,13 @@ EOF sub in_reply_to { my ($hdr) = @_; - my $irt = $hdr->header_raw('In-Reply-To'); - - return mid_clean($irt) if defined $irt && $irt ne ''; - - my $refs = $hdr->header_raw('References'); - if ($refs && $refs =~ /<([^>]+)>\s*\z/s) { - return $1; + my %mid = map { $_ => 1 } $hdr->header_raw('Message-ID'); + my @refs = ($hdr->header_raw('References'), + $hdr->header_raw('In-Reply-To')); + @refs = ((join(' ', @refs)) =~ /<([^>]+)>/g); + while (defined(my $irt = pop @refs)) { + next if $mid{"<$irt>"}; + return $irt; } undef; } @@ -201,7 +201,10 @@ sub _th_index_lite { my $rv = ''; my $mapping = $ctx->{mapping} or return $rv; my $pad = ' '; - my ($attr, $node, $idx, $level) = @{$mapping->{$mid_raw}}; + my $mid_map = $mapping->{$mid_raw}; + defined $mid_map or + return 'public-inbox BUG: '.ascii_html($mid_raw).' not mapped'; + my ($attr, $node, $idx, $level) = @$mid_map; my $children = $node->{children}; my $nr_c = scalar @$children; my $nr_s = 0; -- cgit v1.2.3-24-ge0c7