user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download mbox.gz: |
* [PATCH 2/2] thread: fix parent/child relationships
  @ 2016-10-13  3:59  6% ` Eric Wong
  0 siblings, 0 replies; 3+ results
From: Eric Wong @ 2016-10-13  3:59 UTC (permalink / raw)
  To: meta

The ordering change in add_child is critical if $self == $parent
as the {children} hash was lost before this change.

has_descendent can be simplified by walking upwards from the child
instead of downwards from the parent.

This fixes threading regressions introduced in
commit 30100c46326e2eac275e0af13116636701d2537e
("thread: use hash + array instead of hand-rolled linked list")
---
 lib/PublicInbox/SearchThread.pm | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/lib/PublicInbox/SearchThread.pm b/lib/PublicInbox/SearchThread.pm
index c6bd999..24a56d2 100644
--- a/lib/PublicInbox/SearchThread.pm
+++ b/lib/PublicInbox/SearchThread.pm
@@ -104,27 +104,21 @@ sub add_child {
 	  if $self == $child;
 
 	my $cid = $child->{id};
-	$self->{children}->{$cid} = $child;
 
 	# reparenting:
 	if (defined(my $parent = $child->{parent})) {
 		delete $parent->{children}->{$cid};
 	}
 
+	$self->{children}->{$cid} = $child;
 	$child->{parent} = $self;
 }
 
 sub has_descendent {
-	my ($cur, $child) = @_;
-	my %seen;
-	my @q = ($cur->{parent} || $cur);
-
-	while (defined($cur = shift @q)) {
-		return 1 if $cur == $child;
-
-		if (!$seen{$cur}++) {
-			push @q, values %{$cur->{children}};
-		}
+	my ($self, $child) = @_;
+	while ($child) {
+		return 1 if $self == $child;
+		$child = $child->{parent};
 	}
 	0;
 }
-- 
EW


^ permalink raw reply related	[relevance 6%]

* [PATCH 14/17] thread: use hash + array instead of hand-rolled linked list
  2016-10-05 23:57  7% [PATCH 0/17] remove Mail::Thread dependency Eric Wong
@ 2016-10-05 23:57  4% ` Eric Wong
  0 siblings, 0 replies; 3+ results
From: Eric Wong @ 2016-10-05 23:57 UTC (permalink / raw)
  To: meta

This starts to show noticeable performance improvements when
attempting to thread over 400 messages; but the improvement
may not be measurable with less.

However, the resulting code is much shorter and (IMHO)
much easier to understand.
---
 MANIFEST                        |   1 +
 lib/PublicInbox/SearchThread.pm | 158 +++++++++-------------------------------
 lib/PublicInbox/View.pm         |  28 ++++---
 t/thread-cycle.t                |  86 ++++++++++++++++++++++
 4 files changed, 138 insertions(+), 135 deletions(-)
 create mode 100644 t/thread-cycle.t

diff --git a/MANIFEST b/MANIFEST
index bcc4121..3a4d7c4 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -155,6 +155,7 @@ t/qspawn.t
 t/search.t
 t/spamcheck_spamc.t
 t/spawn.t
+t/thread-cycle.t
 t/utf8.mbox
 t/view.t
 t/watch_maildir.t
diff --git a/lib/PublicInbox/SearchThread.pm b/lib/PublicInbox/SearchThread.pm
index 153eef2..05de9ec 100644
--- a/lib/PublicInbox/SearchThread.pm
+++ b/lib/PublicInbox/SearchThread.pm
@@ -32,9 +32,8 @@ sub new {
 sub thread {
 	my $self = shift;
 	_add_message($self, $_) foreach @{$self->{messages}};
-	$self->{rootset} = [
-			grep { !$_->{parent} } values %{$self->{id_table}} ];
-	delete $self->{id_table};
+	my $id_table = delete $self->{id_table};
+	$self->{rootset} = [ grep { !$_->{parent} } values %$id_table ];
 }
 
 sub _get_cont_for_id ($$) {
@@ -82,156 +81,67 @@ sub _add_message ($$) {
 
 sub order {
 	my ($self, $ordersub) = @_;
-
-	# make a fake root
-	my $root = _get_cont_for_id($self, 'fakeroot');
-	$root->add_child( $_ ) for @{ $self->{rootset} };
-
-	# sort it
-	$root->order_children( $ordersub );
-
-	# and untangle it
-	my $kids = $root->children;
-	$self->{rootset} = $kids;
-	$root->remove_child($_) for @$kids;
+	my $rootset = $ordersub->($self->{rootset});
+	$self->{rootset} = $rootset;
+	$_->order_children($ordersub) for @$rootset;
 }
 
 package PublicInbox::SearchThread::Msg;
+use strict;
+use warnings;
 use Carp qw(croak);
 use Scalar::Util qw(weaken);
 
-sub new { my $self = shift; bless { id => shift }, $self; }
+sub new {
+	bless {
+		id => $_[1],
+		children => {}, # becomes an array when sorted by ->order(...)
+	}, $_[0];
+}
 
 sub add_child {
 	my ($self, $child) = @_;
 	croak "Cowardly refusing to become my own parent: $self"
 	  if $self == $child;
 
-	if (grep { $_ == $child } @{$self->children}) {
-		# All is potentially correct with the world
-		weaken($child->{parent} = $self);
-		return;
-	}
-
-	my $parent = $child->{parent};
-	remove_child($parent, $child) if $parent;
+	my $cid = $child->{id};
+	$self->{children}->{$cid} = $child;
 
-	$child->{next} = $self->{child};
-	$self->{child} = $child;
-	weaken($child->{parent} = $self);
-}
-
-sub remove_child {
-	my ($self, $child) = @_;
-
-	my $x = $self->{child} or return;
-	if ($x == $child) {  # First one's easy.
-		$self->{child} = $child->{next};
-		$child->{parent} = $child->{next} = undef;
-		return;
+	# reparenting:
+	if (defined(my $parent = $child->{parent})) {
+		delete $parent->{children}->{$cid};
 	}
 
-	my $prev = $x;
-	while ($x = $x->{next}) {
-		if ($x == $child) {
-			$prev->{next} = $x->{next}; # Unlink x
-			$x->{next} = $x->{parent} = undef; # Deparent it
-			return;
-		}
-		$prev = $x;
-	}
-	# oddly, we can get here
-	$child->{next} = $child->{parent} = undef;
+	weaken($child->{parent} = $self);
 }
 
 sub has_descendent {
-	my ($self, $child) = @_;
+	my ($cur, $child) = @_;
 	my %seen;
-	my @q = ($self);
-	while (my $cont = shift @q) {
-		$seen{$cont} = 1;
+	my @q = ($cur->{parent} || $cur);
 
-		return 1 if $cont == $child;
+	while (defined($cur = shift @q)) {
+		return 1 if $cur == $child;
 
-		if (my $next = $cont->{next}) {
-			if ($seen{$next}) {
-				$cont->{next} = undef;
-			} else {
-				push @q, $next;
-			}
-		}
-		if (my $child = $cont->{child}) {
-			if ($seen{$child}) {
-				$cont->{child} = undef;
-			} else {
-				push @q, $child;
-			}
+		if (!$seen{$cur}++) {
+			push @q, values %{$cur->{children}};
 		}
 	}
 	0;
 }
 
-sub children {
-	my $self = shift;
-	my @children;
-	my $visitor = $self->{child};
-	while ($visitor) {
-		push @children, $visitor;
-		$visitor = $visitor->{next};
-	}
-	\@children;
-}
-
-sub set_children {
-	my ($self, $children) = @_;
-	my $walk = $self->{child} = shift @$children;
-	do {
-		$walk = $walk->{next} = shift @$children;
-	} while ($walk);
-}
-
 sub order_children {
-	my ($walk, $ordersub) = @_;
+	my ($cur, $ordersub) = @_;
 
-	my %seen;
-	my @visited;
-	while ($walk) {
-		push @visited, $walk;
+	my %seen = ($cur => 1);
+	my @q = ($cur);
+	while (defined($cur = shift @q)) {
+		my $c = $cur->{children}; # The hashref here...
 
-		# spot/break loops
-		$seen{$walk} = 1;
-
-		my $child = $walk->{child};
-		if ($child && $seen{$child}) {
-			$walk->{child} = $child = undef;
-		}
-
-		my $next = $walk->{next};
-		if ($next && $seen{$next}) {
-			$walk->{next} = $next = undef;
-		}
-
-		# go down, or across
-		$next = $child if $child;
-
-		# no next?  look up
-		if (!$next) {
-			my $up = $walk;
-			while ($up && !$next) {
-				$up = $up->{parent};
-				$next = $up->{next} if $up;
-			}
-		}
-		$walk = $next;
-	}
-	foreach my $cont (@visited) {
-		my $children = $cont->children;
-		next if @$children < 2;
-		$children = $ordersub->($children);
-		$cont = $cont->{child} = shift @$children;
-		do {
-			$cont = $cont->{next} = shift @$children;
-		} while ($cont);
+		$c = [ grep { !$seen{$_}++ } values %$c ]; # spot/break loops
+		$c = $ordersub->($c) if scalar @$c > 1;
+		$cur->{children} = $c; # ...becomes an arrayref
+		push @q, @$c;
 	}
 }
 
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index c09b4a2..d0c6d33 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -203,13 +203,15 @@ sub _th_index_lite {
 	my $pad = '  ';
 	# map = [children, attr, node, idx, level]
 	my $map = $mapping->{$mid_raw};
-	my $nr_c = scalar @{$map->[0]};
+	my $children = $map->[0];
+	my $nr_c = scalar @$children;
 	my $nr_s = 0;
 	my $level = $map->[4];
 	my $idx = $map->[3];
+	my $siblings;
 	my $irt_map = $mapping->{$irt} if defined $irt;
 	if (defined $irt_map) {
-		my $siblings = $irt_map->[0];
+		$siblings = $irt_map->[0];
 		$nr_s = scalar(@$siblings) - 1;
 		$rv .= $pad . $irt_map->[1];
 		if ($idx > 0) {
@@ -233,24 +235,26 @@ sub _th_index_lite {
 	$this =~ s!<a\nhref=[^>]+>([^<]+)</a>!$1!s; # no point linking to self
 	$rv .= "<b>@ $this";
 	my $node = $map->[2];
-	if (my $child = $node->{child}) {
-		my $cmid = $child->{id};
+	if ($nr_c) {
+		my $cmid = $children->[0]->{id};
 		$rv .= $pad . $mapping->{$cmid}->[1];
 		if ($nr_c > 2) {
 			my $s = ($nr_c - 1). ' more replies';
 			$rv .= pad_link($cmid, $level + 1, $s);
-		} elsif (my $cn = $child->{next}) {
+		} elsif (my $cn = $children->[1]) {
 			$rv .= $pad . $mapping->{$cn->{id}}->[1];
 		}
 	}
-	if (my $next = $node->{next}) {
+
+	my $next = $siblings->[$idx+1] if $siblings && $idx >= 0;
+	if ($next) {
 		my $nmid = $next->{id};
 		$rv .= $pad . $mapping->{$nmid}->[1];
 		my $nnext = $nr_s - $idx;
 		if ($nnext > 2) {
 			my $s = ($nnext - 1).' subsequent siblings';
 			$rv .= pad_link($nmid, $level, $s);
-		} elsif (my $nn = $next->{next}) {
+		} elsif (my $nn = $siblings->[$idx + 2]) {
 			$rv .= $pad . $mapping->{$nn->{id}}->[1];
 		}
 	}
@@ -264,7 +268,8 @@ sub walk_thread {
 		my $level = shift @q;
 		my $node = shift @q or next;
 		$cb->($ctx, $level, $node);
-		unshift @q, $level+1, $node->{child}, $level, $node->{next};
+		++$level;
+		unshift @q, map { ($level, $_) } @{$node->{children}};
 	}
 }
 
@@ -296,7 +301,8 @@ sub stream_thread ($$) {
 	while (@q) {
 		$level = shift @q;
 		my $node = shift @q or next;
-		unshift @q, $level+1, $node->{child}, $level, $node->{next};
+		my $cl = $level + 1;
+		unshift @q, map { ($cl, $_) } @{$node->{children}};
 		$mime = $inbox->msg_by_smsg($node->{smsg}) and last;
 	}
 	return missing_thread($ctx) unless $mime;
@@ -309,8 +315,8 @@ sub stream_thread ($$) {
 		while (@q) {
 			$level = shift @q;
 			my $node = shift @q or next;
-			unshift @q, $level+1, $node->{child},
-					$level, $node->{next};
+			my $cl = $level + 1;
+			unshift @q, map { ($cl, $_) } @{$node->{children}};
 			my $mid = $node->{id};
 			if ($mime = $inbox->msg_by_smsg($node->{smsg})) {
 				$mime = Email::MIME->new($mime);
diff --git a/t/thread-cycle.t b/t/thread-cycle.t
new file mode 100644
index 0000000..4d60f7e
--- /dev/null
+++ b/t/thread-cycle.t
@@ -0,0 +1,86 @@
+# Copyright (C) 2016 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use warnings;
+use Test::More;
+use_ok('PublicInbox::SearchMsg');
+use_ok('PublicInbox::SearchThread');
+use Email::Simple;
+my $mt = eval {
+	require Mail::Thread;
+	no warnings 'once';
+	$Mail::Thread::nosubject = 1;
+	$Mail::Thread::noprune = 1;
+};
+my @check;
+my @msgs = map {
+	my $msg = $_;
+	$msg->{references} =~ s/\s+/ /sg if $msg->{references};
+	my $simple = Email::Simple->create(header => [
+		'Message-Id' => "<$msg->{mid}>",
+		'References' => $msg->{references},
+	]);
+	push @check, $simple;
+	bless $msg, 'PublicInbox::SearchMsg'
+} (
+
+# data from t/testbox-6 in Mail::Thread 2.55:
+	{ mid => '20021124145312.GA1759@nlin.net' },
+	{ mid => 'slrnau448m.7l4.markj+0111@cloaked.freeserve.co.uk',
+	  references => '<20021124145312.GA1759@nlin.net>',
+	},
+	{ mid => '15842.10677.577458.656565@jupiter.akutech-local.de',
+	  references => '<20021124145312.GA1759@nlin.net>
+			<slrnau448m.7l4.markj+0111@cloaked.freeserve.co.uk>',
+	},
+	{ mid => '20021125171807.GK8236@somanetworks.com',
+	  references => '<20021124145312.GA1759@nlin.net>
+			<slrnau448m.7l4.markj+0111@cloaked.freeserve.co.uk>
+			<15842.10677.577458.656565@jupiter.akutech-local.de>',
+	},
+	{ mid => '15843.12163.554914.469248@jupiter.akutech-local.de',
+	  references => '<20021124145312.GA1759@nlin.net>
+			<slrnau448m.7l4.markj+0111@cloaked.freeserve.co.uk>
+			<15842.10677.577458.656565@jupiter.akutech-local.de>
+			<E18GPHf-0000zp-00@cloaked.freeserve.co.uk>',
+	},
+	{ mid => 'E18GPHf-0000zp-00@cloaked.freeserve.co.uk',
+	  references => '<20021124145312.GA1759@nlin.net>
+			<slrnau448m.7l4.markj+0111@cloaked.freeserve.co.uk>
+			<15842.10677.577458.656565@jupiter.akutech-local.de>'
+	}
+);
+
+my $th = PublicInbox::SearchThread->new(\@msgs);
+$th->thread;
+$th->order(sub { [ sort { $a->{id} cmp $b->{id} } @{$_[0]} ] });
+my $st = '';
+my @q = map { (0, $_) } @{$th->{rootset}};
+while (@q) {
+	my $level = shift @q;
+	my $node = shift @q or next;
+	$st .= (" "x$level). "$node->{id}\n";
+	my $cl = $level + 1;
+	unshift @q, map { ($cl, $_) } @{$node->{children}}
+}
+
+SKIP: {
+	skip 'Mail::Thread missing', 1 unless $mt;
+	$mt = Mail::Thread->new(@check);
+	$mt->thread;
+	$mt->order(sub { sort { $a->messageid cmp $b->messageid } @_ });
+	my $check = '';
+
+	@q = map { (0, $_) } $mt->rootset;
+	while (@q) {
+		my $level = shift @q;
+		my $node = shift @q or next;
+		$check .= (" "x$level) . $node->messageid . "\n";
+		unshift @q, $level + 1, $node->child, $level, $node->next;
+	}
+	is($check, $st, 'Mail::Thread output matches');
+}
+
+done_testing();
+
+1;
-- 
EW


^ permalink raw reply related	[relevance 4%]

* [PATCH 0/17] remove Mail::Thread dependency
@ 2016-10-05 23:57  7% Eric Wong
  2016-10-05 23:57  4% ` [PATCH 14/17] thread: use hash + array instead of hand-rolled linked list Eric Wong
  0 siblings, 1 reply; 3+ results
From: Eric Wong @ 2016-10-05 23:57 UTC (permalink / raw)
  To: meta

This greatly reduces the amount of code we need to load while
reducing abstractions which slow us down and hurt memory use
when displaying gigantic threads.

More may be done and we may use SearchMsg directly for threading
in the future and obviate the need for the container
abstraction.

Eric Wong (17):
      view: remove "subject dummy" references
      thread: remove Mail::Thread dependency
      thread: pass array refs instead of entire arrays
      thread: remove accessor usage in internals
      inbox: deal with ghost smsg
      thread: remove Email::Abstract wrapping
      thread: remove rootset accessor method
      thread: simplify
      thread: remove iterate_down
      thread: avoid incrementing undefined value
      thread: order_children no longer cares about depth
      thread: inline and remove recurse_down logic
      thread: fix sorting without topmost
      thread: use hash + array instead of hand-rolled linked list
      view: remove redundant children array in thread views
      t/thread-cycle: test self-referential messages
      thread: remove weaken dependency

 INSTALL                         |   1 -
 MANIFEST                        |   3 +-
 Makefile.PL                     |   1 -
 lib/PublicInbox/Inbox.pm        |   2 +
 lib/PublicInbox/SearchIdx.pm    |   4 +-
 lib/PublicInbox/SearchMsg.pm    |  29 -------
 lib/PublicInbox/SearchThread.pm | 147 +++++++++++++++++++++++++++++++++++
 lib/PublicInbox/SearchView.pm   |  15 ++--
 lib/PublicInbox/Thread.pm       |  86 ---------------------
 lib/PublicInbox/View.pm         | 165 ++++++++++++++++++----------------------
 lib/PublicInbox/WWW.pm          |   2 +-
 t/plack.t                       |   3 +-
 t/search.t                      |   7 +-
 t/thread-cycle.t                |  97 +++++++++++++++++++++++
 14 files changed, 333 insertions(+), 229 deletions(-)


^ permalink raw reply	[relevance 7%]

Results 1-3 of 3 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2016-10-05 23:57  7% [PATCH 0/17] remove Mail::Thread dependency Eric Wong
2016-10-05 23:57  4% ` [PATCH 14/17] thread: use hash + array instead of hand-rolled linked list Eric Wong
2016-10-13  3:59     [PATCH 0/2] thread: fix regressions from Mail::Thread removal Eric Wong
2016-10-13  3:59  6% ` [PATCH 2/2] thread: fix parent/child relationships Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).