* [PATCH] content_hash: skip Sender for cross posted messages
@ 2021-01-30 5:41 Eric Wong
0 siblings, 0 replies; only message in thread
From: Eric Wong @ 2021-01-30 5:41 UTC (permalink / raw)
To: meta
This regression was introduced long ago and matches behavior
originally specified in the comments. It makes a noticeable
improvement with search results using -extindex ("all") and
lei results with multiple inboxes.
Update some style bits at the top of the test case while
we're at it.
Fixes: f0ef0a56a8957d6f ("v2: improve deduplication checks")
---
lib/PublicInbox/ContentHash.pm | 7 +++----
t/content_hash.t | 14 +++++++++++++-
2 files changed, 16 insertions(+), 5 deletions(-)
diff --git a/lib/PublicInbox/ContentHash.pm b/lib/PublicInbox/ContentHash.pm
index 838fdd6f..4dbe7b50 100644
--- a/lib/PublicInbox/ContentHash.pm
+++ b/lib/PublicInbox/ContentHash.pm
@@ -68,10 +68,9 @@ sub content_digest ($) {
# Only use Sender: if From is not present
foreach my $h (qw(From Sender)) {
- my @v = $eml->header($h);
- if (@v) {
- digest_addr($dig, $h, $_) foreach @v;
- }
+ my @v = $eml->header($h) or next;
+ digest_addr($dig, $h, $_) foreach @v;
+ last;
}
foreach my $h (qw(Subject Date)) {
my @v = $eml->header($h);
diff --git a/t/content_hash.t b/t/content_hash.t
index 3f02b1b3..060665f6 100644
--- a/t/content_hash.t
+++ b/t/content_hash.t
@@ -1,7 +1,8 @@
+#!perl -w
# Copyright (C) 2018-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
-use warnings;
+use v5.10.1;
use Test::More;
use PublicInbox::ContentHash qw(content_hash);
use PublicInbox::Eml;
@@ -19,6 +20,17 @@ EOF
my $orig = content_hash($mime);
my $reload = content_hash(PublicInbox::Eml->new($mime->as_string));
is($orig, $reload, 'content_hash matches after serialization');
+{
+ my $s1 = PublicInbox::Eml->new($mime->as_string);
+ $s1->header_set('Sender', 's@example.com');
+ is(content_hash($s1), $orig, "Sender ignored when 'From' present");
+ my $s2 = PublicInbox::Eml->new($s1->as_string);
+ $s1->header_set('Sender', 'sender@example.com');
+ is(content_hash($s2), $orig, "Sender really ignored 'From'");
+ $_->header_set('From') for ($s1, $s2);
+ isnt(content_hash($s1), content_hash($s2),
+ 'sender accounted when From missing');
+}
foreach my $h (qw(From To Cc)) {
my $n = q("Quoted N'Ame" <foo@EXAMPLE.com>);
^ permalink raw reply related [flat|nested] only message in thread
only message in thread, other threads:[~2021-01-30 5:41 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-01-30 5:41 [PATCH] content_hash: skip Sender for cross posted messages Eric Wong
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).