From 6a19d29cb98823ee3794d295e122ee265ab3016a Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 18 Aug 2015 01:11:04 +0000 Subject: search: avoid creating ghosts for circular References Some mail software incorrectly creates circular references and causes us to create ghosts before the actual mail doc is created. --- lib/PublicInbox/Search.pm | 43 ++++++++++++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 13 deletions(-) (limited to 'lib/PublicInbox/Search.pm') diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index 617c267b..db86301d 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -79,8 +79,8 @@ sub add_message { my $db = $self->{xdb}; my $doc_id; - my $mid = mid_clean($mime->header_obj->header_raw('Message-ID')); - $mid = mid_compressed($mid); + my $mid_orig = mid_clean($mime->header_obj->header_raw('Message-ID')); + my $mid = mid_compressed($mid_orig); my $was_ghost = 0; my $ct_msg = $mime->header('Content-Type') || 'text/plain'; my $enc_msg = PublicInbox::View::enc_for($ct_msg); @@ -176,7 +176,7 @@ sub add_message { }; if ($@) { - warn "failed to index message <$mid>: $@\n"; + warn "failed to index message <$mid_orig>: $@\n"; return undef; } $doc_id; @@ -184,11 +184,11 @@ sub add_message { # returns deleted doc_id on success, undef on missing sub remove_message { - my ($self, $mid) = @_; + my ($self, $mid_orig) = @_; my $db = $self->{xdb}; my $doc_id; - $mid = mid_clean($mid); - $mid = mid_compressed($mid); + $mid_orig = mid_clean($mid_orig); + my $mid = mid_compressed($mid_orig); eval { $doc_id = $self->find_unique_doc_id('mid', $mid); @@ -196,7 +196,7 @@ sub remove_message { }; if ($@) { - warn "failed to remove message <$mid>: $@\n"; + warn "failed to remove message <$mid_orig>: $@\n"; return undef; } $doc_id; @@ -347,16 +347,33 @@ sub link_message_to_parents { if ($irt =~ /<([^>]+)>/) { $irt = $1; } - push @refs, $irt; + + # maybe some crazies will try to make a circular reference: + if ($irt eq $mid) { + $irt = undef; + } else { + push @refs, $irt; + } } my $tid; if (@refs) { - @refs = map { mid_compressed($_) } @refs; - my %uniq; - @refs = grep { !$uniq{$_}++ } @refs; # uniq - - $doc->add_term(xpfx('inreplyto') . $refs[-1]); + my @crefs = map { mid_compressed($_) } @refs; + my %uniq = ($mid => 1); + + # prevent circular references via References: here: + @refs = (); + foreach my $ref (@crefs) { + next if $uniq{$ref}; + $uniq{$ref} = 1; + push @refs, $ref; + } + $irt = undef if (defined $irt && !$uniq{$irt}); + } + if (@refs) { + if (defined $irt) { + $doc->add_term(xpfx('inreplyto') . $irt); + } my $ref_pfx = xpfx('references'); -- cgit v1.2.3-24-ge0c7