about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <e@yhbt.net>2020-01-23 23:05:59 +0000
committerEric Wong <e@yhbt.net>2020-01-24 23:04:58 +0000
commit2a56039ff6a6a8d0b04437b5463c172ecab002c5 (patch)
treea3e939f79e2346b56fb9065c965d469596f76e81
parent36db110eb3befcb28627d0f3d2d618aa3f1bf76c (diff)
downloadpublic-inbox-2a56039ff6a6a8d0b04437b5463c172ecab002c5.tar.gz
OverIdx::parse_references already skips duplicate
References (which we use in SearchThread for rendering).
So there's no reason for our content deduplication logic
to care if a Message-Id in the Reference header is mentioned
twice.
-rw-r--r--lib/PublicInbox/ContentId.pm3
-rw-r--r--lib/PublicInbox/OverIdx.pm3
2 files changed, 2 insertions, 4 deletions
diff --git a/lib/PublicInbox/ContentId.pm b/lib/PublicInbox/ContentId.pm
index 0c4a8678..65691593 100644
--- a/lib/PublicInbox/ContentId.pm
+++ b/lib/PublicInbox/ContentId.pm
@@ -64,8 +64,7 @@ sub content_digest ($) {
         # if we got here, we've already got Message-ID reuse
         my %seen = map { $_ => 1 } @{mids($hdr)};
         foreach my $mid (@{references($hdr)}) {
-                next if $seen{$mid};
-                $dig->add("ref\0$mid\0");
+                $dig->add("ref\0$mid\0") unless $seen{$mid}++;
         }
 
         # Only use Sender: if From is not present
diff --git a/lib/PublicInbox/OverIdx.pm b/lib/PublicInbox/OverIdx.pm
index 189bd21d..5f1007aa 100644
--- a/lib/PublicInbox/OverIdx.pm
+++ b/lib/PublicInbox/OverIdx.pm
@@ -230,8 +230,7 @@ sub parse_references ($$$) {
                         warn "References: <$ref> too long, ignoring\n";
                         next;
                 }
-                next if $seen{$ref}++;
-                push @keep, $ref;
+                push(@keep, $ref) unless $seen{$ref}++;
         }
         $smsg->{references} = '<'.join('> <', @keep).'>' if @keep;
         \@keep;