about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2016-10-05 23:47:29 +0000
committerEric Wong <e@80x24.org>2016-10-05 23:53:35 +0000
commit30100c46326e2eac275e0af13116636701d2537e (patch)
treee8d6c86f73bdbc779ee2bdd5b1f4377bfb615148
parent3c9dd6619f825f0515e7e4afa1bd55c99c1a68d3 (diff)
downloadpublic-inbox-30100c46326e2eac275e0af13116636701d2537e.tar.gz
This starts to show noticeable performance improvements when
attempting to thread over 400 messages; but the improvement
may not be measurable with less.

However, the resulting code is much shorter and (IMHO)
much easier to understand.
-rw-r--r--MANIFEST1
-rw-r--r--lib/PublicInbox/SearchThread.pm158
-rw-r--r--lib/PublicInbox/View.pm28
-rw-r--r--t/thread-cycle.t86
4 files changed, 138 insertions, 135 deletions
diff --git a/MANIFEST b/MANIFEST
index bcc41216..3a4d7c4d 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -155,6 +155,7 @@ t/qspawn.t
 t/search.t
 t/spamcheck_spamc.t
 t/spawn.t
+t/thread-cycle.t
 t/utf8.mbox
 t/view.t
 t/watch_maildir.t
diff --git a/lib/PublicInbox/SearchThread.pm b/lib/PublicInbox/SearchThread.pm
index 153eef2b..05de9ec5 100644
--- a/lib/PublicInbox/SearchThread.pm
+++ b/lib/PublicInbox/SearchThread.pm
@@ -32,9 +32,8 @@ sub new {
 sub thread {
         my $self = shift;
         _add_message($self, $_) foreach @{$self->{messages}};
-        $self->{rootset} = [
-                        grep { !$_->{parent} } values %{$self->{id_table}} ];
-        delete $self->{id_table};
+        my $id_table = delete $self->{id_table};
+        $self->{rootset} = [ grep { !$_->{parent} } values %$id_table ];
 }
 
 sub _get_cont_for_id ($$) {
@@ -82,156 +81,67 @@ sub _add_message ($$) {
 
 sub order {
         my ($self, $ordersub) = @_;
-
-        # make a fake root
-        my $root = _get_cont_for_id($self, 'fakeroot');
-        $root->add_child( $_ ) for @{ $self->{rootset} };
-
-        # sort it
-        $root->order_children( $ordersub );
-
-        # and untangle it
-        my $kids = $root->children;
-        $self->{rootset} = $kids;
-        $root->remove_child($_) for @$kids;
+        my $rootset = $ordersub->($self->{rootset});
+        $self->{rootset} = $rootset;
+        $_->order_children($ordersub) for @$rootset;
 }
 
 package PublicInbox::SearchThread::Msg;
+use strict;
+use warnings;
 use Carp qw(croak);
 use Scalar::Util qw(weaken);
 
-sub new { my $self = shift; bless { id => shift }, $self; }
+sub new {
+        bless {
+                id => $_[1],
+                children => {}, # becomes an array when sorted by ->order(...)
+        }, $_[0];
+}
 
 sub add_child {
         my ($self, $child) = @_;
         croak "Cowardly refusing to become my own parent: $self"
           if $self == $child;
 
-        if (grep { $_ == $child } @{$self->children}) {
-                # All is potentially correct with the world
-                weaken($child->{parent} = $self);
-                return;
-        }
-
-        my $parent = $child->{parent};
-        remove_child($parent, $child) if $parent;
+        my $cid = $child->{id};
+        $self->{children}->{$cid} = $child;
 
-        $child->{next} = $self->{child};
-        $self->{child} = $child;
-        weaken($child->{parent} = $self);
-}
-
-sub remove_child {
-        my ($self, $child) = @_;
-
-        my $x = $self->{child} or return;
-        if ($x == $child) {  # First one's easy.
-                $self->{child} = $child->{next};
-                $child->{parent} = $child->{next} = undef;
-                return;
+        # reparenting:
+        if (defined(my $parent = $child->{parent})) {
+                delete $parent->{children}->{$cid};
         }
 
-        my $prev = $x;
-        while ($x = $x->{next}) {
-                if ($x == $child) {
-                        $prev->{next} = $x->{next}; # Unlink x
-                        $x->{next} = $x->{parent} = undef; # Deparent it
-                        return;
-                }
-                $prev = $x;
-        }
-        # oddly, we can get here
-        $child->{next} = $child->{parent} = undef;
+        weaken($child->{parent} = $self);
 }
 
 sub has_descendent {
-        my ($self, $child) = @_;
+        my ($cur, $child) = @_;
         my %seen;
-        my @q = ($self);
-        while (my $cont = shift @q) {
-                $seen{$cont} = 1;
+        my @q = ($cur->{parent} || $cur);
 
-                return 1 if $cont == $child;
+        while (defined($cur = shift @q)) {
+                return 1 if $cur == $child;
 
-                if (my $next = $cont->{next}) {
-                        if ($seen{$next}) {
-                                $cont->{next} = undef;
-                        } else {
-                                push @q, $next;
-                        }
-                }
-                if (my $child = $cont->{child}) {
-                        if ($seen{$child}) {
-                                $cont->{child} = undef;
-                        } else {
-                                push @q, $child;
-                        }
+                if (!$seen{$cur}++) {
+                        push @q, values %{$cur->{children}};
                 }
         }
         0;
 }
 
-sub children {
-        my $self = shift;
-        my @children;
-        my $visitor = $self->{child};
-        while ($visitor) {
-                push @children, $visitor;
-                $visitor = $visitor->{next};
-        }
-        \@children;
-}
-
-sub set_children {
-        my ($self, $children) = @_;
-        my $walk = $self->{child} = shift @$children;
-        do {
-                $walk = $walk->{next} = shift @$children;
-        } while ($walk);
-}
-
 sub order_children {
-        my ($walk, $ordersub) = @_;
+        my ($cur, $ordersub) = @_;
 
-        my %seen;
-        my @visited;
-        while ($walk) {
-                push @visited, $walk;
+        my %seen = ($cur => 1);
+        my @q = ($cur);
+        while (defined($cur = shift @q)) {
+                my $c = $cur->{children}; # The hashref here...
 
-                # spot/break loops
-                $seen{$walk} = 1;
-
-                my $child = $walk->{child};
-                if ($child && $seen{$child}) {
-                        $walk->{child} = $child = undef;
-                }
-
-                my $next = $walk->{next};
-                if ($next && $seen{$next}) {
-                        $walk->{next} = $next = undef;
-                }
-
-                # go down, or across
-                $next = $child if $child;
-
-                # no next?  look up
-                if (!$next) {
-                        my $up = $walk;
-                        while ($up && !$next) {
-                                $up = $up->{parent};
-                                $next = $up->{next} if $up;
-                        }
-                }
-                $walk = $next;
-        }
-        foreach my $cont (@visited) {
-                my $children = $cont->children;
-                next if @$children < 2;
-                $children = $ordersub->($children);
-                $cont = $cont->{child} = shift @$children;
-                do {
-                        $cont = $cont->{next} = shift @$children;
-                } while ($cont);
+                $c = [ grep { !$seen{$_}++ } values %$c ]; # spot/break loops
+                $c = $ordersub->($c) if scalar @$c > 1;
+                $cur->{children} = $c; # ...becomes an arrayref
+                push @q, @$c;
         }
 }
 
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index c09b4a22..d0c6d339 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -203,13 +203,15 @@ sub _th_index_lite {
         my $pad = '  ';
         # map = [children, attr, node, idx, level]
         my $map = $mapping->{$mid_raw};
-        my $nr_c = scalar @{$map->[0]};
+        my $children = $map->[0];
+        my $nr_c = scalar @$children;
         my $nr_s = 0;
         my $level = $map->[4];
         my $idx = $map->[3];
+        my $siblings;
         my $irt_map = $mapping->{$irt} if defined $irt;
         if (defined $irt_map) {
-                my $siblings = $irt_map->[0];
+                $siblings = $irt_map->[0];
                 $nr_s = scalar(@$siblings) - 1;
                 $rv .= $pad . $irt_map->[1];
                 if ($idx > 0) {
@@ -233,24 +235,26 @@ sub _th_index_lite {
         $this =~ s!<a\nhref=[^>]+>([^<]+)</a>!$1!s; # no point linking to self
         $rv .= "<b>@ $this";
         my $node = $map->[2];
-        if (my $child = $node->{child}) {
-                my $cmid = $child->{id};
+        if ($nr_c) {
+                my $cmid = $children->[0]->{id};
                 $rv .= $pad . $mapping->{$cmid}->[1];
                 if ($nr_c > 2) {
                         my $s = ($nr_c - 1). ' more replies';
                         $rv .= pad_link($cmid, $level + 1, $s);
-                } elsif (my $cn = $child->{next}) {
+                } elsif (my $cn = $children->[1]) {
                         $rv .= $pad . $mapping->{$cn->{id}}->[1];
                 }
         }
-        if (my $next = $node->{next}) {
+
+        my $next = $siblings->[$idx+1] if $siblings && $idx >= 0;
+        if ($next) {
                 my $nmid = $next->{id};
                 $rv .= $pad . $mapping->{$nmid}->[1];
                 my $nnext = $nr_s - $idx;
                 if ($nnext > 2) {
                         my $s = ($nnext - 1).' subsequent siblings';
                         $rv .= pad_link($nmid, $level, $s);
-                } elsif (my $nn = $next->{next}) {
+                } elsif (my $nn = $siblings->[$idx + 2]) {
                         $rv .= $pad . $mapping->{$nn->{id}}->[1];
                 }
         }
@@ -264,7 +268,8 @@ sub walk_thread {
                 my $level = shift @q;
                 my $node = shift @q or next;
                 $cb->($ctx, $level, $node);
-                unshift @q, $level+1, $node->{child}, $level, $node->{next};
+                ++$level;
+                unshift @q, map { ($level, $_) } @{$node->{children}};
         }
 }
 
@@ -296,7 +301,8 @@ sub stream_thread ($$) {
         while (@q) {
                 $level = shift @q;
                 my $node = shift @q or next;
-                unshift @q, $level+1, $node->{child}, $level, $node->{next};
+                my $cl = $level + 1;
+                unshift @q, map { ($cl, $_) } @{$node->{children}};
                 $mime = $inbox->msg_by_smsg($node->{smsg}) and last;
         }
         return missing_thread($ctx) unless $mime;
@@ -309,8 +315,8 @@ sub stream_thread ($$) {
                 while (@q) {
                         $level = shift @q;
                         my $node = shift @q or next;
-                        unshift @q, $level+1, $node->{child},
-                                        $level, $node->{next};
+                        my $cl = $level + 1;
+                        unshift @q, map { ($cl, $_) } @{$node->{children}};
                         my $mid = $node->{id};
                         if ($mime = $inbox->msg_by_smsg($node->{smsg})) {
                                 $mime = Email::MIME->new($mime);
diff --git a/t/thread-cycle.t b/t/thread-cycle.t
new file mode 100644
index 00000000..4d60f7e3
--- /dev/null
+++ b/t/thread-cycle.t
@@ -0,0 +1,86 @@
+# Copyright (C) 2016 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use warnings;
+use Test::More;
+use_ok('PublicInbox::SearchMsg');
+use_ok('PublicInbox::SearchThread');
+use Email::Simple;
+my $mt = eval {
+        require Mail::Thread;
+        no warnings 'once';
+        $Mail::Thread::nosubject = 1;
+        $Mail::Thread::noprune = 1;
+};
+my @check;
+my @msgs = map {
+        my $msg = $_;
+        $msg->{references} =~ s/\s+/ /sg if $msg->{references};
+        my $simple = Email::Simple->create(header => [
+                'Message-Id' => "<$msg->{mid}>",
+                'References' => $msg->{references},
+        ]);
+        push @check, $simple;
+        bless $msg, 'PublicInbox::SearchMsg'
+} (
+
+# data from t/testbox-6 in Mail::Thread 2.55:
+        { mid => '20021124145312.GA1759@nlin.net' },
+        { mid => 'slrnau448m.7l4.markj+0111@cloaked.freeserve.co.uk',
+          references => '<20021124145312.GA1759@nlin.net>',
+        },
+        { mid => '15842.10677.577458.656565@jupiter.akutech-local.de',
+          references => '<20021124145312.GA1759@nlin.net>
+                        <slrnau448m.7l4.markj+0111@cloaked.freeserve.co.uk>',
+        },
+        { mid => '20021125171807.GK8236@somanetworks.com',
+          references => '<20021124145312.GA1759@nlin.net>
+                        <slrnau448m.7l4.markj+0111@cloaked.freeserve.co.uk>
+                        <15842.10677.577458.656565@jupiter.akutech-local.de>',
+        },
+        { mid => '15843.12163.554914.469248@jupiter.akutech-local.de',
+          references => '<20021124145312.GA1759@nlin.net>
+                        <slrnau448m.7l4.markj+0111@cloaked.freeserve.co.uk>
+                        <15842.10677.577458.656565@jupiter.akutech-local.de>
+                        <E18GPHf-0000zp-00@cloaked.freeserve.co.uk>',
+        },
+        { mid => 'E18GPHf-0000zp-00@cloaked.freeserve.co.uk',
+          references => '<20021124145312.GA1759@nlin.net>
+                        <slrnau448m.7l4.markj+0111@cloaked.freeserve.co.uk>
+                        <15842.10677.577458.656565@jupiter.akutech-local.de>'
+        }
+);
+
+my $th = PublicInbox::SearchThread->new(\@msgs);
+$th->thread;
+$th->order(sub { [ sort { $a->{id} cmp $b->{id} } @{$_[0]} ] });
+my $st = '';
+my @q = map { (0, $_) } @{$th->{rootset}};
+while (@q) {
+        my $level = shift @q;
+        my $node = shift @q or next;
+        $st .= (" "x$level). "$node->{id}\n";
+        my $cl = $level + 1;
+        unshift @q, map { ($cl, $_) } @{$node->{children}}
+}
+
+SKIP: {
+        skip 'Mail::Thread missing', 1 unless $mt;
+        $mt = Mail::Thread->new(@check);
+        $mt->thread;
+        $mt->order(sub { sort { $a->messageid cmp $b->messageid } @_ });
+        my $check = '';
+
+        @q = map { (0, $_) } $mt->rootset;
+        while (@q) {
+                my $level = shift @q;
+                my $node = shift @q or next;
+                $check .= (" "x$level) . $node->messageid . "\n";
+                unshift @q, $level + 1, $node->child, $level, $node->next;
+        }
+        is($check, $st, 'Mail::Thread output matches');
+}
+
+done_testing();
+
+1;