From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.3.2 (2011-06-06) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-3.5 required=3.0 tests=ALL_TRUSTED,BAYES_00, RP_MATCHES_RCVD shortcircuit=no autolearn=unavailable version=3.3.2 X-Original-To: meta@public-inbox.org Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 95F9B1F81B for ; Tue, 18 Aug 2015 01:21:10 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 2/5] search: avoid creating ghosts for circular References Date: Tue, 18 Aug 2015 01:21:07 +0000 Message-Id: <1439860870-8086-2-git-send-email-e@80x24.org> In-Reply-To: <1439860870-8086-1-git-send-email-e@80x24.org> References: <1439860870-8086-1-git-send-email-e@80x24.org> List-Id: Some mail software incorrectly creates circular references and causes us to create ghosts before the actual mail doc is created. --- lib/PublicInbox/Search.pm | 43 ++++++++++++++++++++++++++++++------------- t/search.t | 19 +++++++++++++++++++ 2 files changed, 49 insertions(+), 13 deletions(-) diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index 617c267..db86301 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -79,8 +79,8 @@ sub add_message { my $db = $self->{xdb}; my $doc_id; - my $mid = mid_clean($mime->header_obj->header_raw('Message-ID')); - $mid = mid_compressed($mid); + my $mid_orig = mid_clean($mime->header_obj->header_raw('Message-ID')); + my $mid = mid_compressed($mid_orig); my $was_ghost = 0; my $ct_msg = $mime->header('Content-Type') || 'text/plain'; my $enc_msg = PublicInbox::View::enc_for($ct_msg); @@ -176,7 +176,7 @@ sub add_message { }; if ($@) { - warn "failed to index message <$mid>: $@\n"; + warn "failed to index message <$mid_orig>: $@\n"; return undef; } $doc_id; @@ -184,11 +184,11 @@ sub add_message { # returns deleted doc_id on success, undef on missing sub remove_message { - my ($self, $mid) = @_; + my ($self, $mid_orig) = @_; my $db = $self->{xdb}; my $doc_id; - $mid = mid_clean($mid); - $mid = mid_compressed($mid); + $mid_orig = mid_clean($mid_orig); + my $mid = mid_compressed($mid_orig); eval { $doc_id = $self->find_unique_doc_id('mid', $mid); @@ -196,7 +196,7 @@ sub remove_message { }; if ($@) { - warn "failed to remove message <$mid>: $@\n"; + warn "failed to remove message <$mid_orig>: $@\n"; return undef; } $doc_id; @@ -347,16 +347,33 @@ sub link_message_to_parents { if ($irt =~ /<([^>]+)>/) { $irt = $1; } - push @refs, $irt; + + # maybe some crazies will try to make a circular reference: + if ($irt eq $mid) { + $irt = undef; + } else { + push @refs, $irt; + } } my $tid; if (@refs) { - @refs = map { mid_compressed($_) } @refs; - my %uniq; - @refs = grep { !$uniq{$_}++ } @refs; # uniq - - $doc->add_term(xpfx('inreplyto') . $refs[-1]); + my @crefs = map { mid_compressed($_) } @refs; + my %uniq = ($mid => 1); + + # prevent circular references via References: here: + @refs = (); + foreach my $ref (@crefs) { + next if $uniq{$ref}; + $uniq{$ref} = 1; + push @refs, $ref; + } + $irt = undef if (defined $irt && !$uniq{$irt}); + } + if (@refs) { + if (defined $irt) { + $doc->add_term(xpfx('inreplyto') . $irt); + } my $ref_pfx = xpfx('references'); diff --git a/t/search.t b/t/search.t index 0ad0886..55abe9e 100644 --- a/t/search.t +++ b/t/search.t @@ -243,6 +243,25 @@ sub filter_mids { "quoted result returned if nothing else"); } +# circular references +{ + my $doc_id = $rw->add_message(Email::MIME->create( + header_str => [ + Date => 'Sat, 02 Oct 2010 00:00:01 +0000', + Subject => 'Circle', + 'Message-ID' => '', + 'References' => '', + 'In-Reply-To' => '', + From => 'Circle ', + To => 'list@example.com', + ], + body => "LOOP!\n")); + ok($doc_id > 0, "doc_id defined with circular reference"); + my $smsg = $rw->lookup_message('circle@a'); + $smsg->ensure_metadata; + is($smsg->{references}, undef, "no references created"); +} + done_testing(); 1; -- EW