about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2015-08-18 01:11:04 +0000
committerEric Wong <e@80x24.org>2015-08-18 01:11:53 +0000
commit6a19d29cb98823ee3794d295e122ee265ab3016a (patch)
treef73aef5f9d98ab9205c7b76735415594058381c4
parent7310cc79cea5ba64b1cc7c97169d811d8a4653a0 (diff)
downloadpublic-inbox-6a19d29cb98823ee3794d295e122ee265ab3016a.tar.gz
Some mail software incorrectly creates circular references
and causes us to create ghosts before the actual mail doc
is created.
-rw-r--r--lib/PublicInbox/Search.pm43
-rw-r--r--t/search.t19
2 files changed, 49 insertions, 13 deletions
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 617c267b..db86301d 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -79,8 +79,8 @@ sub add_message {
         my $db = $self->{xdb};
 
         my $doc_id;
-        my $mid = mid_clean($mime->header_obj->header_raw('Message-ID'));
-        $mid = mid_compressed($mid);
+        my $mid_orig = mid_clean($mime->header_obj->header_raw('Message-ID'));
+        my $mid = mid_compressed($mid_orig);
         my $was_ghost = 0;
         my $ct_msg = $mime->header('Content-Type') || 'text/plain';
         my $enc_msg = PublicInbox::View::enc_for($ct_msg);
@@ -176,7 +176,7 @@ sub add_message {
         };
 
         if ($@) {
-                warn "failed to index message <$mid>: $@\n";
+                warn "failed to index message <$mid_orig>: $@\n";
                 return undef;
         }
         $doc_id;
@@ -184,11 +184,11 @@ sub add_message {
 
 # returns deleted doc_id on success, undef on missing
 sub remove_message {
-        my ($self, $mid) = @_;
+        my ($self, $mid_orig) = @_;
         my $db = $self->{xdb};
         my $doc_id;
-        $mid = mid_clean($mid);
-        $mid = mid_compressed($mid);
+        $mid_orig = mid_clean($mid_orig);
+        my $mid = mid_compressed($mid_orig);
 
         eval {
                 $doc_id = $self->find_unique_doc_id('mid', $mid);
@@ -196,7 +196,7 @@ sub remove_message {
         };
 
         if ($@) {
-                warn "failed to remove message <$mid>: $@\n";
+                warn "failed to remove message <$mid_orig>: $@\n";
                 return undef;
         }
         $doc_id;
@@ -347,16 +347,33 @@ sub link_message_to_parents {
                 if ($irt =~ /<([^>]+)>/) {
                         $irt = $1;
                 }
-                push @refs, $irt;
+
+                # maybe some crazies will try to make a circular reference:
+                if ($irt eq $mid) {
+                        $irt = undef;
+                } else {
+                        push @refs, $irt;
+                }
         }
 
         my $tid;
         if (@refs) {
-                @refs = map { mid_compressed($_) } @refs;
-                my %uniq;
-                @refs = grep { !$uniq{$_}++ } @refs; # uniq
-
-                $doc->add_term(xpfx('inreplyto') . $refs[-1]);
+                my @crefs = map { mid_compressed($_) } @refs;
+                my %uniq = ($mid => 1);
+
+                # prevent circular references via References: here:
+                @refs = ();
+                foreach my $ref (@crefs) {
+                        next if $uniq{$ref};
+                        $uniq{$ref} = 1;
+                        push @refs, $ref;
+                }
+                $irt = undef if (defined $irt && !$uniq{$irt});
+        }
+        if (@refs) {
+                if (defined $irt) {
+                        $doc->add_term(xpfx('inreplyto') . $irt);
+                }
 
                 my $ref_pfx = xpfx('references');
 
diff --git a/t/search.t b/t/search.t
index 0ad0886b..55abe9e8 100644
--- a/t/search.t
+++ b/t/search.t
@@ -243,6 +243,25 @@ sub filter_mids {
                 "quoted result returned if nothing else");
 }
 
+# circular references
+{
+        my $doc_id = $rw->add_message(Email::MIME->create(
+                header_str => [
+                        Date => 'Sat, 02 Oct 2010 00:00:01 +0000',
+                        Subject => 'Circle',
+                        'Message-ID' => '<circle@a>',
+                        'References' => '<circle@a>',
+                        'In-Reply-To' => '<circle@a>',
+                        From => 'Circle <circle@example.com>',
+                        To => 'list@example.com',
+                ],
+                body => "LOOP!\n"));
+        ok($doc_id > 0, "doc_id defined with circular reference");
+        my $smsg = $rw->lookup_message('circle@a');
+        $smsg->ensure_metadata;
+        is($smsg->{references}, undef, "no references created");
+}
+
 done_testing();
 
 1;