about summary refs log tree commit homepage
path: root/lib/PublicInbox/SearchThread.pm
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2016-10-05 23:47:17 +0000
committerEric Wong <e@80x24.org>2016-10-05 23:52:07 +0000
commit172416d1cd465da4242cc744a3f309d307f1311d (patch)
tree99800009e9a7707536978338e862091bfb6823ff /lib/PublicInbox/SearchThread.pm
parent08913d1b05e32a7415cbe8afc3c229d108817de8 (diff)
downloadpublic-inbox-172416d1cd465da4242cc744a3f309d307f1311d.tar.gz
Introduce our own SearchThread class for threading messages.
This should allow us to specialize and optimize away objects
in future commits.
Diffstat (limited to 'lib/PublicInbox/SearchThread.pm')
-rw-r--r--lib/PublicInbox/SearchThread.pm323
1 files changed, 323 insertions, 0 deletions
diff --git a/lib/PublicInbox/SearchThread.pm b/lib/PublicInbox/SearchThread.pm
new file mode 100644
index 00000000..41fe859e
--- /dev/null
+++ b/lib/PublicInbox/SearchThread.pm
@@ -0,0 +1,323 @@
+# This library is free software; you can redistribute it and/or modify
+# it under the same terms as Perl itself.
+#
+# This license differs from the rest of public-inbox
+#
+# Our own jwz-style threading class based on Mail::Thread from CPAN.
+# Mail::Thread is unmaintained and available on some distros.
+# We also do not want pruning or subject grouping, since we want
+# to encourage strict threading and hopefully encourage people
+# to use proper In-Reply-To.
+#
+# This includes fixes from several open bugs for Mail::Thread
+#
+# Avoid circular references
+# - https://rt.cpan.org/Public/Bug/Display.html?id=22817
+#
+# And avoid recursion in recurse_down:
+# - https://rt.cpan.org/Ticket/Display.html?id=116727
+# - http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=833479
+package PublicInbox::SearchThread;
+use strict;
+use warnings;
+use Email::Abstract;
+
+sub new {
+        return bless {
+                messages => $_[1],
+                id_table => {},
+                rootset  => []
+        }, $_[0];
+}
+
+sub _get_hdr {
+        my ($class, $msg, $hdr) = @_;
+        Email::Abstract->get_header($msg, $hdr) || '';
+}
+
+sub _uniq {
+        my %seen;
+        return grep { !$seen{$_}++ } @_;
+}
+
+sub _references {
+        my $class = shift;
+        my $msg = shift;
+        my @references = ($class->_get_hdr($msg, "References") =~ /<([^>]+)>/g);
+        my $foo = $class->_get_hdr($msg, "In-Reply-To");
+        chomp $foo;
+        $foo =~ s/.*?<([^>]+)>.*/$1/;
+        push @references, $foo
+          if $foo =~ /^\S+\@\S+$/ && (!@references || $references[-1] ne $foo);
+        return _uniq(@references);
+}
+
+sub _msgid {
+        my ($class, $msg) = @_;
+        my $id = $class->_get_hdr($msg, "Message-ID");
+        die "attempt to thread message with no id" unless $id;
+        chomp $id;
+        $id =~ s/^<([^>]+)>.*/$1/; # We expect this not to have <>s
+        return $id;
+}
+
+sub rootset { @{$_[0]{rootset}} }
+
+sub thread {
+        my $self = shift;
+        $self->_setup();
+        $self->{rootset} = [ grep { !$_->parent } values %{$self->{id_table}} ];
+        $self->_finish();
+}
+
+sub _finish {
+        my $self = shift;
+        delete $self->{id_table};
+        delete $self->{seen};
+}
+
+sub _get_cont_for_id {
+        my $self = shift;
+        my $id = shift;
+        $self->{id_table}{$id} ||= $self->_container_class->new($id);
+}
+
+sub _container_class { 'PublicInbox::SearchThread::Container' }
+
+sub _setup {
+        my ($self) = @_;
+
+        _add_message($self, $_) foreach @{$self->{messages}};
+}
+
+sub _add_message ($$) {
+        my ($self, $message) = @_;
+
+        # A. if id_table...
+        my $this_container = $self->_get_cont_for_id($self->_msgid($message));
+        $this_container->message($message);
+
+        # B. For each element in the message's References field:
+        my @refs = $self->_references($message);
+
+        my $prev;
+        for my $ref (@refs) {
+                # Find a Container object for the given Message-ID
+                my $container = $self->_get_cont_for_id($ref);
+
+                # Link the References field's Containers together in the
+                # order implied by the References header
+                # * If they are already linked don't change the existing links
+                # * Do not add a link if adding that link would introduce
+                #   a loop...
+
+                if ($prev &&
+                        !$container->parent &&  # already linked
+                        !$container->has_descendent($prev) # would loop
+                   ) {
+                        $prev->add_child($container);
+                }
+                $prev = $container;
+        }
+
+        # C. Set the parent of this message to be the last element in
+        # References...
+        if ($prev &&
+                !$this_container->has_descendent($prev) # would loop
+           ) {
+                $prev->add_child($this_container)
+        }
+}
+
+sub order {
+        my $self = shift;
+        my $ordersub = shift;
+
+        # make a fake root
+        my $root = $self->_container_class->new( 'fakeroot' );
+        $root->add_child( $_ ) for @{ $self->{rootset} };
+
+        # sort it
+        $root->order_children( $ordersub );
+
+        # and untangle it
+        my @kids = $root->children;
+        $self->{rootset} = \@kids;
+        $root->remove_child($_) for @kids;
+}
+
+package PublicInbox::SearchThread::Container;
+use Carp qw(croak);
+use Scalar::Util qw(weaken);
+
+sub new { my $self = shift; bless { id => shift }, $self; }
+
+sub message { $_[0]->{message} = $_[1] if @_ == 2; $_[0]->{message} }
+sub parent { @_ == 2 ? weaken($_[0]->{parent} = $_[1]) : $_[0]->{parent} }
+sub child { $_[0]->{child} = $_[1] if @_ == 2; $_[0]->{child} }
+sub next { $_[0]->{next} = $_[1] if @_ == 2; $_[0]->{next} }
+sub messageid { $_[0]->{id} }
+
+sub add_child {
+        my ($self, $child) = @_;
+        croak "Cowardly refusing to become my own parent: $self"
+          if $self == $child;
+
+        if (grep { $_ == $child } $self->children) {
+                # All is potentially correct with the world
+                $child->parent($self);
+                return;
+        }
+
+        $child->parent->remove_child($child) if $child->parent;
+
+        $child->next($self->child);
+        $self->child($child);
+        $child->parent($self);
+}
+
+sub remove_child {
+        my ($self, $child) = @_;
+        return unless $self->child;
+        if ($self->child == $child) {  # First one's easy.
+                $self->child($child->next);
+                $child->next(undef);
+                $child->parent(undef);
+                return;
+        }
+
+        my $x = $self->child;
+        my $prev = $x;
+        while ($x = $x->next) {
+                if ($x == $child) {
+                        $prev->next($x->next); # Unlink x
+                        $x->next(undef);
+                        $x->parent(undef);         # Deparent it
+                        return;
+                }
+                $prev = $x;
+        }
+        # oddly, we can get here
+        $child->next(undef);
+        $child->parent(undef);
+}
+
+sub has_descendent {
+        my $self = shift;
+        my $child = shift;
+        die "Assertion failed: $child" unless eval {$child};
+        my $there = 0;
+        $self->recurse_down(sub { $there = 1 if $_[0] == $child });
+
+        return $there;
+}
+
+sub children {
+        my $self = shift;
+        my @children;
+        my $visitor = $self->child;
+        while ($visitor) {
+                push @children, $visitor;
+                $visitor = $visitor->next
+        }
+        return @children;
+}
+
+sub set_children {
+        my $self = shift;
+        my $walk = $self->child( shift );
+        while (@_) { $walk = $walk->next( shift ) }
+        $walk->next(undef) if $walk;
+}
+
+sub order_children {
+        my $self = shift;
+        my $ordersub = shift;
+
+        return unless $ordersub;
+
+        my $sub = sub {
+                my $cont = shift;
+                my @children = $cont->children;
+                return if @children < 2;
+                $cont->set_children( $ordersub->( @children ) );
+        };
+        $self->iterate_down( undef, $sub );
+        undef $sub;
+}
+
+# non-recursive version of recurse_down to avoid stack depth warnings
+sub recurse_down {
+        my ($self, $callback) = @_;
+        my %seen;
+        my @q = ($self);
+        while (my $cont = shift @q) {
+                $seen{$cont}++;
+                $callback->($cont);
+
+                if (my $next = $cont->next) {
+                        if ($seen{$next}) {
+                                $cont->next(undef);
+                        } else {
+                                push @q, $next;
+                        }
+                }
+                if (my $child = $cont->child) {
+                        if ($seen{$child}) {
+                                $cont->child(undef);
+                        } else {
+                                push @q, $child;
+                        }
+                }
+        }
+}
+
+sub iterate_down {
+        my $self = shift;
+        my ($before, $after) = @_;
+
+        my %seen;
+        my $walk = $self;
+        my $depth = 0;
+        my @visited;
+        while ($walk) {
+                push @visited, [ $walk, $depth ];
+                $before->($walk, $depth) if $before;
+
+                # spot/break loops
+                $seen{$walk}++;
+
+                my $child = $walk->child;
+                if ($child && $seen{$child}) {
+                        $walk->child(undef);
+                        $child = undef;
+                }
+
+                my $next = $walk->next;
+                if ($next && $seen{$next}) {
+                        $walk->next(undef);
+                        $next = undef;
+                }
+
+                # go down, or across
+                if ($child) {
+                        $next = $child;
+                        ++$depth;
+                }
+
+                # no next?  look up
+                if (!$next) {
+                        my $up = $walk;
+                        while ($up && !$next) {
+                                $up = $up->parent;
+                                --$depth;
+                                $next = $up->next if $up;
+                        }
+                }
+                $walk = $next;
+        }
+        return unless $after;
+        while (@visited) { $after->(@{ pop @visited }) }
+}
+
+1;