about summary refs log tree commit homepage
diff options
context:
space:
mode:
-rw-r--r--INSTALL1
-rw-r--r--MANIFEST2
-rw-r--r--Makefile.PL1
-rw-r--r--lib/PublicInbox/SearchIdx.pm4
-rw-r--r--lib/PublicInbox/SearchThread.pm323
-rw-r--r--lib/PublicInbox/SearchView.pm4
-rw-r--r--lib/PublicInbox/Thread.pm86
-rw-r--r--lib/PublicInbox/View.pm4
-rw-r--r--lib/PublicInbox/WWW.pm2
-rw-r--r--t/plack.t3
10 files changed, 332 insertions, 98 deletions
diff --git a/INSTALL b/INSTALL
index 5851892c..3a2f840c 100644
--- a/INSTALL
+++ b/INSTALL
@@ -37,7 +37,6 @@ Optional components:
 Optional Perl modules:
 
   - Plack[1]                   libplack-perl
-  - Mail::Thread (2.5+)[1]     libmail-thread-perl
   - URI::Escape[1]             liburi-perl
   - Search::Xapian[2][3]       libsearch-xapian-perl
   - IO::Compress::Gzip[3]      perl-modules (or libio-compress-perl)
diff --git a/MANIFEST b/MANIFEST
index c39fa261..bcc41216 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -78,11 +78,11 @@ lib/PublicInbox/SaPlugin/ListMirror.pm
 lib/PublicInbox/Search.pm
 lib/PublicInbox/SearchIdx.pm
 lib/PublicInbox/SearchMsg.pm
+lib/PublicInbox/SearchThread.pm
 lib/PublicInbox/SearchView.pm
 lib/PublicInbox/Spamcheck/Spamc.pm
 lib/PublicInbox/Spawn.pm
 lib/PublicInbox/SpawnPP.pm
-lib/PublicInbox/Thread.pm
 lib/PublicInbox/Unsubscribe.pm
 lib/PublicInbox/View.pm
 lib/PublicInbox/WWW.pm
diff --git a/Makefile.PL b/Makefile.PL
index 4a911037..0bac7c95 100644
--- a/Makefile.PL
+++ b/Makefile.PL
@@ -22,7 +22,6 @@ WriteMakefile(
                 'Email::MIME::ContentType' => 0,
                 'Email::Simple' => 0,
                 'Encode::MIME::Header' => 0,
-                'Mail::Thread' => '2.5', # 2.5+ needed for Email::Simple compat
                 'Plack' => 0,
                 'URI::Escape' => 0,
                 # We have more test dependencies, but do not force
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 23aef9f3..4aac0281 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -4,8 +4,8 @@
 #
 # Indexes mail with Xapian and our (SQLite-based) ::Msgmap for use
 # with the web and NNTP interfaces.  This index maintains thread
-# relationships for use by Mail::Thread.  This writes to the search
-# index.
+# relationships for use by PublicInbox::SearchThread.
+# This writes to the search index.
 package PublicInbox::SearchIdx;
 use strict;
 use warnings;
diff --git a/lib/PublicInbox/SearchThread.pm b/lib/PublicInbox/SearchThread.pm
new file mode 100644
index 00000000..41fe859e
--- /dev/null
+++ b/lib/PublicInbox/SearchThread.pm
@@ -0,0 +1,323 @@
+# This library is free software; you can redistribute it and/or modify
+# it under the same terms as Perl itself.
+#
+# This license differs from the rest of public-inbox
+#
+# Our own jwz-style threading class based on Mail::Thread from CPAN.
+# Mail::Thread is unmaintained and available on some distros.
+# We also do not want pruning or subject grouping, since we want
+# to encourage strict threading and hopefully encourage people
+# to use proper In-Reply-To.
+#
+# This includes fixes from several open bugs for Mail::Thread
+#
+# Avoid circular references
+# - https://rt.cpan.org/Public/Bug/Display.html?id=22817
+#
+# And avoid recursion in recurse_down:
+# - https://rt.cpan.org/Ticket/Display.html?id=116727
+# - http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=833479
+package PublicInbox::SearchThread;
+use strict;
+use warnings;
+use Email::Abstract;
+
+sub new {
+        return bless {
+                messages => $_[1],
+                id_table => {},
+                rootset  => []
+        }, $_[0];
+}
+
+sub _get_hdr {
+        my ($class, $msg, $hdr) = @_;
+        Email::Abstract->get_header($msg, $hdr) || '';
+}
+
+sub _uniq {
+        my %seen;
+        return grep { !$seen{$_}++ } @_;
+}
+
+sub _references {
+        my $class = shift;
+        my $msg = shift;
+        my @references = ($class->_get_hdr($msg, "References") =~ /<([^>]+)>/g);
+        my $foo = $class->_get_hdr($msg, "In-Reply-To");
+        chomp $foo;
+        $foo =~ s/.*?<([^>]+)>.*/$1/;
+        push @references, $foo
+          if $foo =~ /^\S+\@\S+$/ && (!@references || $references[-1] ne $foo);
+        return _uniq(@references);
+}
+
+sub _msgid {
+        my ($class, $msg) = @_;
+        my $id = $class->_get_hdr($msg, "Message-ID");
+        die "attempt to thread message with no id" unless $id;
+        chomp $id;
+        $id =~ s/^<([^>]+)>.*/$1/; # We expect this not to have <>s
+        return $id;
+}
+
+sub rootset { @{$_[0]{rootset}} }
+
+sub thread {
+        my $self = shift;
+        $self->_setup();
+        $self->{rootset} = [ grep { !$_->parent } values %{$self->{id_table}} ];
+        $self->_finish();
+}
+
+sub _finish {
+        my $self = shift;
+        delete $self->{id_table};
+        delete $self->{seen};
+}
+
+sub _get_cont_for_id {
+        my $self = shift;
+        my $id = shift;
+        $self->{id_table}{$id} ||= $self->_container_class->new($id);
+}
+
+sub _container_class { 'PublicInbox::SearchThread::Container' }
+
+sub _setup {
+        my ($self) = @_;
+
+        _add_message($self, $_) foreach @{$self->{messages}};
+}
+
+sub _add_message ($$) {
+        my ($self, $message) = @_;
+
+        # A. if id_table...
+        my $this_container = $self->_get_cont_for_id($self->_msgid($message));
+        $this_container->message($message);
+
+        # B. For each element in the message's References field:
+        my @refs = $self->_references($message);
+
+        my $prev;
+        for my $ref (@refs) {
+                # Find a Container object for the given Message-ID
+                my $container = $self->_get_cont_for_id($ref);
+
+                # Link the References field's Containers together in the
+                # order implied by the References header
+                # * If they are already linked don't change the existing links
+                # * Do not add a link if adding that link would introduce
+                #   a loop...
+
+                if ($prev &&
+                        !$container->parent &&  # already linked
+                        !$container->has_descendent($prev) # would loop
+                   ) {
+                        $prev->add_child($container);
+                }
+                $prev = $container;
+        }
+
+        # C. Set the parent of this message to be the last element in
+        # References...
+        if ($prev &&
+                !$this_container->has_descendent($prev) # would loop
+           ) {
+                $prev->add_child($this_container)
+        }
+}
+
+sub order {
+        my $self = shift;
+        my $ordersub = shift;
+
+        # make a fake root
+        my $root = $self->_container_class->new( 'fakeroot' );
+        $root->add_child( $_ ) for @{ $self->{rootset} };
+
+        # sort it
+        $root->order_children( $ordersub );
+
+        # and untangle it
+        my @kids = $root->children;
+        $self->{rootset} = \@kids;
+        $root->remove_child($_) for @kids;
+}
+
+package PublicInbox::SearchThread::Container;
+use Carp qw(croak);
+use Scalar::Util qw(weaken);
+
+sub new { my $self = shift; bless { id => shift }, $self; }
+
+sub message { $_[0]->{message} = $_[1] if @_ == 2; $_[0]->{message} }
+sub parent { @_ == 2 ? weaken($_[0]->{parent} = $_[1]) : $_[0]->{parent} }
+sub child { $_[0]->{child} = $_[1] if @_ == 2; $_[0]->{child} }
+sub next { $_[0]->{next} = $_[1] if @_ == 2; $_[0]->{next} }
+sub messageid { $_[0]->{id} }
+
+sub add_child {
+        my ($self, $child) = @_;
+        croak "Cowardly refusing to become my own parent: $self"
+          if $self == $child;
+
+        if (grep { $_ == $child } $self->children) {
+                # All is potentially correct with the world
+                $child->parent($self);
+                return;
+        }
+
+        $child->parent->remove_child($child) if $child->parent;
+
+        $child->next($self->child);
+        $self->child($child);
+        $child->parent($self);
+}
+
+sub remove_child {
+        my ($self, $child) = @_;
+        return unless $self->child;
+        if ($self->child == $child) {  # First one's easy.
+                $self->child($child->next);
+                $child->next(undef);
+                $child->parent(undef);
+                return;
+        }
+
+        my $x = $self->child;
+        my $prev = $x;
+        while ($x = $x->next) {
+                if ($x == $child) {
+                        $prev->next($x->next); # Unlink x
+                        $x->next(undef);
+                        $x->parent(undef);         # Deparent it
+                        return;
+                }
+                $prev = $x;
+        }
+        # oddly, we can get here
+        $child->next(undef);
+        $child->parent(undef);
+}
+
+sub has_descendent {
+        my $self = shift;
+        my $child = shift;
+        die "Assertion failed: $child" unless eval {$child};
+        my $there = 0;
+        $self->recurse_down(sub { $there = 1 if $_[0] == $child });
+
+        return $there;
+}
+
+sub children {
+        my $self = shift;
+        my @children;
+        my $visitor = $self->child;
+        while ($visitor) {
+                push @children, $visitor;
+                $visitor = $visitor->next
+        }
+        return @children;
+}
+
+sub set_children {
+        my $self = shift;
+        my $walk = $self->child( shift );
+        while (@_) { $walk = $walk->next( shift ) }
+        $walk->next(undef) if $walk;
+}
+
+sub order_children {
+        my $self = shift;
+        my $ordersub = shift;
+
+        return unless $ordersub;
+
+        my $sub = sub {
+                my $cont = shift;
+                my @children = $cont->children;
+                return if @children < 2;
+                $cont->set_children( $ordersub->( @children ) );
+        };
+        $self->iterate_down( undef, $sub );
+        undef $sub;
+}
+
+# non-recursive version of recurse_down to avoid stack depth warnings
+sub recurse_down {
+        my ($self, $callback) = @_;
+        my %seen;
+        my @q = ($self);
+        while (my $cont = shift @q) {
+                $seen{$cont}++;
+                $callback->($cont);
+
+                if (my $next = $cont->next) {
+                        if ($seen{$next}) {
+                                $cont->next(undef);
+                        } else {
+                                push @q, $next;
+                        }
+                }
+                if (my $child = $cont->child) {
+                        if ($seen{$child}) {
+                                $cont->child(undef);
+                        } else {
+                                push @q, $child;
+                        }
+                }
+        }
+}
+
+sub iterate_down {
+        my $self = shift;
+        my ($before, $after) = @_;
+
+        my %seen;
+        my $walk = $self;
+        my $depth = 0;
+        my @visited;
+        while ($walk) {
+                push @visited, [ $walk, $depth ];
+                $before->($walk, $depth) if $before;
+
+                # spot/break loops
+                $seen{$walk}++;
+
+                my $child = $walk->child;
+                if ($child && $seen{$child}) {
+                        $walk->child(undef);
+                        $child = undef;
+                }
+
+                my $next = $walk->next;
+                if ($next && $seen{$next}) {
+                        $walk->next(undef);
+                        $next = undef;
+                }
+
+                # go down, or across
+                if ($child) {
+                        $next = $child;
+                        ++$depth;
+                }
+
+                # no next?  look up
+                if (!$next) {
+                        my $up = $walk;
+                        while ($up && !$next) {
+                                $up = $up->parent;
+                                --$depth;
+                                $next = $up->next if $up;
+                        }
+                }
+                $walk = $next;
+        }
+        return unless $after;
+        while (@visited) { $after->(@{ pop @visited }) }
+}
+
+1;
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index 4f0811a8..da311093 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -11,7 +11,7 @@ use PublicInbox::View;
 use PublicInbox::MID qw(mid2path mid_mime mid_clean mid_escape);
 use Email::MIME;
 require PublicInbox::Git;
-require PublicInbox::Thread;
+require PublicInbox::SearchThread;
 our $LIM = 50;
 
 sub noop {}
@@ -152,7 +152,7 @@ sub mset_thread {
                 $m;
         } ($mset->items);
 
-        my $th = PublicInbox::Thread->new(@m);
+        my $th = PublicInbox::SearchThread->new(\@m);
         $th->thread;
         if ($q->{r}) { # order by relevance
                 $th->order(sub {
diff --git a/lib/PublicInbox/Thread.pm b/lib/PublicInbox/Thread.pm
deleted file mode 100644
index 8af94616..00000000
--- a/lib/PublicInbox/Thread.pm
+++ /dev/null
@@ -1,86 +0,0 @@
-# subclass Mail::Thread and use this to workaround a memory leak
-# Based on the patch in: https://rt.cpan.org/Public/Bug/Display.html?id=22817
-#
-# Additionally, workaround for a bug where $walk->topmost returns undef:
-# - http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=795913
-# - https://rt.cpan.org/Ticket/Display.html?id=106498
-#
-# And avoid recursion in recurse_down:
-# - https://rt.cpan.org/Ticket/Display.html?id=116727
-# - http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=833479
-#
-# License differs from the rest of public-inbox (but is compatible):
-# This library is free software; you can redistribute it and/or modify
-# it under the same terms as Perl itself.
-package PublicInbox::Thread;
-use strict;
-use warnings;
-use base qw(Mail::Thread);
-# WARNING! both these Mail::Thread knobs were found by inspecting
-# the Mail::Thread 2.55 source code, and we have some monkey patches
-# in PublicInbox::Thread to fix memory leaks.  Since Mail::Thread
-# appears unmaintained, I suppose it's safe to depend on these
-# variables for now:
-{
-        no warnings 'once';
-        # we want strict threads to expose (and hopefully discourage)
-        # use of broken email clients
-        $Mail::Thread::nosubject = 1;
-        # Keep ghosts with only a single direct child,
-        # don't hide that there may be missing messages.
-        $Mail::Thread::noprune = 1;
-}
-
-if ($Mail::Thread::VERSION <= 2.55) {
-        eval q(sub _container_class { 'PublicInbox::Thread::Container' });
-}
-
-package PublicInbox::Thread::Container;
-use strict;
-use warnings;
-use base qw(Mail::Thread::Container);
-use Scalar::Util qw(weaken);
-sub parent { @_ == 2 ? weaken($_[0]->{parent} = $_[1]) : $_[0]->{parent} }
-
-sub topmost {
-        $_[0]->SUPER::topmost || PublicInbox::Thread::CPANRTBug106498->new;
-}
-
-# non-recursive version of recurse_down to avoid stack depth warnings
-sub recurse_down {
-        my ($self, $callback) = @_;
-        my %seen;
-        my @q = ($self);
-        while (my $cont = shift @q) {
-                $seen{$cont}++;
-                $callback->($cont);
-
-                if (my $next = $cont->next) {
-                        if ($seen{$next}) {
-                                $cont->next(undef);
-                        } else {
-                                push @q, $next;
-                        }
-                }
-                if (my $child = $cont->child) {
-                        if ($seen{$child}) {
-                                $cont->child(undef);
-                        } else {
-                                push @q, $child;
-                        }
-                }
-        }
-}
-
-# ref:
-# - http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=795913
-# - https://rt.cpan.org/Ticket/Display.html?id=106498
-package PublicInbox::Thread::CPANRTBug106498;
-use strict;
-use warnings;
-
-sub new { bless {}, $_[0] }
-
-sub simple_subject {}
-
-1;
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index a3b26814..9f1bf460 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -749,8 +749,8 @@ sub msg_timestamp {
 
 sub thread_results {
         my ($msgs) = @_;
-        require PublicInbox::Thread;
-        my $th = PublicInbox::Thread->new(@$msgs);
+        require PublicInbox::SearchThread;
+        my $th = PublicInbox::SearchThread->new($msgs);
         $th->thread;
         $th->order(*sort_ts);
         $th
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index 4d599fc9..11fc92e9 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -112,7 +112,7 @@ sub call {
 sub preload {
         require PublicInbox::Feed;
         require PublicInbox::View;
-        require PublicInbox::Thread;
+        require PublicInbox::SearchThread;
         require Email::MIME;
         require Digest::SHA;
         require POSIX;
diff --git a/t/plack.t b/t/plack.t
index 608afb9e..1d624589 100644
--- a/t/plack.t
+++ b/t/plack.t
@@ -11,8 +11,7 @@ my $pi_config = "$tmpdir/config";
 my $maindir = "$tmpdir/main.git";
 my $addr = 'test-public@example.com';
 my $cfgpfx = "publicinbox.test";
-my @mods = qw(HTTP::Request::Common Plack::Test
-        Mail::Thread URI::Escape);
+my @mods = qw(HTTP::Request::Common Plack::Test URI::Escape);
 foreach my $mod (@mods) {
         eval "require $mod";
         plan skip_all => "$mod missing for plack.t" if $@;