user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download mbox.gz: |
* [PATCH 06/17] thread: remove Email::Abstract wrapping
  2016-10-05 23:57  7% [PATCH 0/17] remove Mail::Thread dependency Eric Wong
@ 2016-10-05 23:57  3% ` Eric Wong
  0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2016-10-05 23:57 UTC (permalink / raw)
  To: meta

This roughly doubles performance due to the reduction in
object creation and abstraction layers.
---
 lib/PublicInbox/SearchMsg.pm    | 29 ------------
 lib/PublicInbox/SearchThread.pm | 99 ++++++++++++-----------------------------
 lib/PublicInbox/SearchView.pm   |  8 ++--
 lib/PublicInbox/View.pm         | 90 +++++++++++++++++--------------------
 t/search.t                      |  7 +--
 5 files changed, 76 insertions(+), 157 deletions(-)

diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm
index 9d873c4..9dcc1e6 100644
--- a/lib/PublicInbox/SearchMsg.pm
+++ b/lib/PublicInbox/SearchMsg.pm
@@ -144,35 +144,6 @@ sub ensure_metadata {
 	}
 }
 
-# for threading only
-sub mini_mime {
-	my ($self) = @_;
-	$self->ensure_metadata;
-	my @h = (
-		'Subject' => $self->subject,
-		'X-PI-From' => $self->from_name,
-		# prevent Email::Simple::Creator from running,
-		# this header is useless for threading as we use X-PI-TS
-		# for sorting and display:
-		'Date' => EPOCH_822,
-		'Message-ID' => "<$self->{mid}>",
-		'X-PI-TS' => $self->ts,
-	);
-	if (my $refs = $self->{references}) {
-		push @h, References => $refs;
-	}
-	my $mime = Email::MIME->create(header => \@h);
-	my $h = $mime->header_obj;
-
-	# set these headers manually since Encode::encode('MIME-Q', ...)
-	# will add spaces to long values when using header_str above.
-
-	# drop useless headers Email::MIME set for us
-	$h->header_set('Date');
-	$h->header_set('MIME-Version');
-	$mime;
-}
-
 sub mid ($;$) {
 	my ($self, $mid) = @_;
 
diff --git a/lib/PublicInbox/SearchThread.pm b/lib/PublicInbox/SearchThread.pm
index 53fec97..d39e9b6 100644
--- a/lib/PublicInbox/SearchThread.pm
+++ b/lib/PublicInbox/SearchThread.pm
@@ -20,7 +20,6 @@
 package PublicInbox::SearchThread;
 use strict;
 use warnings;
-use Email::Abstract;
 
 sub new {
 	return bless {
@@ -30,37 +29,6 @@ sub new {
 	}, $_[0];
 }
 
-sub _get_hdr {
-	my ($class, $msg, $hdr) = @_;
-	Email::Abstract->get_header($msg, $hdr) || '';
-}
-
-sub _uniq {
-	my %seen;
-	return grep { !$seen{$_}++ } @_;
-}
-
-sub _references {
-	my $class = shift;
-	my $msg = shift;
-	my @references = ($class->_get_hdr($msg, "References") =~ /<([^>]+)>/g);
-	my $foo = $class->_get_hdr($msg, "In-Reply-To");
-	chomp $foo;
-	$foo =~ s/.*?<([^>]+)>.*/$1/;
-	push @references, $foo
-	  if $foo =~ /^\S+\@\S+$/ && (!@references || $references[-1] ne $foo);
-	return _uniq(@references);
-}
-
-sub _msgid {
-	my ($class, $msg) = @_;
-	my $id = $class->_get_hdr($msg, "Message-ID");
-	die "attempt to thread message with no id" unless $id;
-	chomp $id;
-	$id =~ s/^<([^>]+)>.*/$1/; # We expect this not to have <>s
-	return $id;
-}
-
 sub rootset { @{$_[0]{rootset}} }
 
 sub thread {
@@ -77,14 +45,11 @@ sub _finish {
 	delete $self->{seen};
 }
 
-sub _get_cont_for_id {
-	my $self = shift;
-	my $id = shift;
-	$self->{id_table}{$id} ||= $self->_container_class->new($id);
+sub _get_cont_for_id ($$) {
+	my ($self, $mid) = @_;
+	$self->{id_table}{$mid} ||= PublicInbox::SearchThread::Msg->new($mid);
 }
 
-sub _container_class { 'PublicInbox::SearchThread::Container' }
-
 sub _setup {
 	my ($self) = @_;
 
@@ -92,41 +57,40 @@ sub _setup {
 }
 
 sub _add_message ($$) {
-	my ($self, $message) = @_;
+	my ($self, $smsg) = @_;
 
 	# A. if id_table...
-	my $this_container = $self->_get_cont_for_id($self->_msgid($message));
-	$this_container->{message} = $message;
+	my $this = _get_cont_for_id($self, $smsg->{mid});
+	$this->{smsg} = $smsg;
 
 	# B. For each element in the message's References field:
-	my @refs = $self->_references($message);
-
 	my $prev;
-	for my $ref (@refs) {
-		# Find a Container object for the given Message-ID
-		my $container = $self->_get_cont_for_id($ref);
-
-		# Link the References field's Containers together in the
-		# order implied by the References header
-		# * If they are already linked don't change the existing links
-		# * Do not add a link if adding that link would introduce
-		#   a loop...
-
-		if ($prev &&
-			!$container->{parent} &&  # already linked
-			!$container->has_descendent($prev) # would loop
-		   ) {
-			$prev->add_child($container);
+	if (defined(my $refs = $smsg->{references})) {
+		foreach my $ref ($refs =~ m/<([^>]+)>/g) {
+			# Find a Container object for the given Message-ID
+			my $cont = _get_cont_for_id($self, $ref);
+
+			# Link the References field's Containers together in
+			# the order implied by the References header
+			#
+			# * If they are already linked don't change the
+			#   existing links
+			# * Do not add a link if adding that link would
+			#   introduce a loop...
+			if ($prev &&
+				!$cont->{parent} &&  # already linked
+				!$cont->has_descendent($prev) # would loop
+			   ) {
+				$prev->add_child($cont);
+			}
+			$prev = $cont;
 		}
-		$prev = $container;
 	}
 
 	# C. Set the parent of this message to be the last element in
 	# References...
-	if ($prev &&
-		!$this_container->has_descendent($prev) # would loop
-	   ) {
-		$prev->add_child($this_container)
+	if ($prev && !$this->has_descendent($prev)) { # would loop
+		$prev->add_child($this)
 	}
 }
 
@@ -135,7 +99,7 @@ sub order {
 	my $ordersub = shift;
 
 	# make a fake root
-	my $root = $self->_container_class->new( 'fakeroot' );
+	my $root = _get_cont_for_id($self, 'fakeroot');
 	$root->add_child( $_ ) for @{ $self->{rootset} };
 
 	# sort it
@@ -147,17 +111,12 @@ sub order {
 	$root->remove_child($_) for @$kids;
 }
 
-package PublicInbox::SearchThread::Container;
+package PublicInbox::SearchThread::Msg;
 use Carp qw(croak);
 use Scalar::Util qw(weaken);
 
 sub new { my $self = shift; bless { id => shift }, $self; }
 
-sub message { $_[0]->{message} }
-sub child { $_[0]->{child} }
-sub next { $_[0]->{next} }
-sub messageid { $_[0]->{id} }
-
 sub add_child {
 	my ($self, $child) = @_;
 	croak "Cowardly refusing to become my own parent: $self"
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index 0d54c3d..ebeb41f 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -148,7 +148,6 @@ sub mset_thread {
 		my $i = $_;
 		my $m = PublicInbox::SearchMsg->load_doc($i->get_document);
 		$pct{$m->mid} = $i->get_percent;
-		$m = $m->mini_mime;
 		$m;
 	} ($mset->items);
 
@@ -156,9 +155,9 @@ sub mset_thread {
 	$th->thread;
 	if ($q->{r}) { # order by relevance
 		$th->order(sub {
-			[ sort { (eval { $pct{$b->topmost->messageid} } || 0)
+			[ sort { (eval { $pct{$b->topmost->{id}} } || 0)
 					<=>
-				(eval { $pct{$a->topmost->messageid} } || 0)
+				(eval { $pct{$a->topmost->{id}} } || 0)
 			} @{$_[0]} ];
 		});
 	} else { # order by time (default for threaded view)
@@ -185,8 +184,7 @@ sub mset_thread {
 	sub {
 		return unless $msgs;
 		while ($mime = shift @$msgs) {
-			my $mid = mid_clean(mid_mime($mime));
-			$mime = $inbox->msg_by_mid($mid) and last;
+			$mime = $inbox->msg_by_smsg($mime) and last;
 		}
 		if ($mime) {
 			$mime = Email::MIME->new($mime);
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index e90efda..748e391 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -214,12 +214,12 @@ sub _th_index_lite {
 		$rv .= $pad . $irt_map->[1];
 		if ($idx > 0) {
 			my $prev = $siblings->[$idx - 1];
-			my $pmid = $prev->messageid;
+			my $pmid = $prev->{id};
 			if ($idx > 2) {
 				my $s = ($idx - 1). ' preceding siblings ...';
 				$rv .= pad_link($pmid, $level, $s);
 			} elsif ($idx == 2) {
-				my $ppmid = $siblings->[0]->messageid;
+				my $ppmid = $siblings->[0]->{id};
 				$rv .= $pad . $mapping->{$ppmid}->[1];
 			}
 			$rv .= $pad . $mapping->{$pmid}->[1];
@@ -233,25 +233,25 @@ sub _th_index_lite {
 	$this =~ s!<a\nhref=[^>]+>([^<]+)</a>!$1!s; # no point linking to self
 	$rv .= "<b>@ $this";
 	my $node = $map->[2];
-	if (my $child = $node->child) {
-		my $cmid = $child->messageid;
+	if (my $child = $node->{child}) {
+		my $cmid = $child->{id};
 		$rv .= $pad . $mapping->{$cmid}->[1];
 		if ($nr_c > 2) {
 			my $s = ($nr_c - 1). ' more replies';
 			$rv .= pad_link($cmid, $level + 1, $s);
-		} elsif (my $cn = $child->next) {
-			$rv .= $pad . $mapping->{$cn->messageid}->[1];
+		} elsif (my $cn = $child->{next}) {
+			$rv .= $pad . $mapping->{$cn->{id}}->[1];
 		}
 	}
-	if (my $next = $node->next) {
-		my $nmid = $next->messageid;
+	if (my $next = $node->{next}) {
+		my $nmid = $next->{id};
 		$rv .= $pad . $mapping->{$nmid}->[1];
 		my $nnext = $nr_s - $idx;
 		if ($nnext > 2) {
 			my $s = ($nnext - 1).' subsequent siblings';
 			$rv .= pad_link($nmid, $level, $s);
-		} elsif (my $nn = $next->next) {
-			$rv .= $pad . $mapping->{$nn->messageid}->[1];
+		} elsif (my $nn = $next->{next}) {
+			$rv .= $pad . $mapping->{$nn->{id}}->[1];
 		}
 	}
 	$rv .= $pad ."<a\nhref=#r$id>$s_s, $s_c; $ctx->{s_nr}</a>\n";
@@ -264,7 +264,7 @@ sub walk_thread {
 		my $level = shift @q;
 		my $node = shift @q or next;
 		$cb->($ctx, $level, $node);
-		unshift @q, $level+1, $node->child, $level, $node->next;
+		unshift @q, $level+1, $node->{child}, $level, $node->{next};
 	}
 }
 
@@ -272,12 +272,12 @@ sub pre_thread  {
 	my ($ctx, $level, $node) = @_;
 	my $mapping = $ctx->{mapping};
 	my $idx = -1;
-	if (my $parent = $node->parent) {
-		my $m = $mapping->{$parent->messageid}->[0];
+	if (my $parent = $node->{parent}) {
+		my $m = $mapping->{$parent->{id}}->[0];
 		$idx = scalar @$m;
 		push @$m, $node;
 	}
-	$mapping->{$node->messageid} = [ [], '', $node, $idx, $level ];
+	$mapping->{$node->{id}} = [ [], '', $node, $idx, $level ];
 	skel_dump($ctx, $level, $node);
 }
 
@@ -296,8 +296,8 @@ sub stream_thread ($$) {
 	while (@q) {
 		$level = shift @q;
 		my $node = shift @q or next;
-		unshift @q, $level+1, $node->child, $level, $node->next;
-		$mime = $inbox->msg_by_mid($node->messageid) and last;
+		unshift @q, $level+1, $node->{child}, $level, $node->{next};
+		$mime = $inbox->msg_by_smsg($node->{smsg}) and last;
 	}
 	return missing_thread($ctx) unless $mime;
 
@@ -309,13 +309,14 @@ sub stream_thread ($$) {
 		while (@q) {
 			$level = shift @q;
 			my $node = shift @q or next;
-			unshift @q, $level+1, $node->child, $level, $node->next;
-			my $mid = $node->messageid;
-			if ($mime = $inbox->msg_by_mid($mid)) {
+			unshift @q, $level+1, $node->{child},
+					$level, $node->{next};
+			my $mid = $node->{id};
+			if ($mime = $inbox->msg_by_smsg($node->{smsg})) {
 				$mime = Email::MIME->new($mime);
 				return thread_index_entry($ctx, $level, $mime);
 			} else {
-				return ghost_index_entry($ctx, $level, $mid);
+				return ghost_index_entry($ctx, $level, $node);
 			}
 		}
 		my $ret = join('', thread_adj_level($ctx, 0));
@@ -355,11 +356,11 @@ sub thread_html {
 	$skel .= '</pre>';
 	return stream_thread($th, $ctx) unless $ctx->{flat};
 
-	# flat display: lazy load the full message from mini_mime:
+	# flat display: lazy load the full message from smsg
 	my $inbox = $ctx->{-inbox};
 	my $mime;
 	while ($mime = shift @$msgs) {
-		$mime = $inbox->msg_by_mid(mid_clean(mid_mime($mime))) and last;
+		$mime = $inbox->msg_by_smsg($mime) and last;
 	}
 	return missing_thread($ctx) unless $mime;
 	$mime = Email::MIME->new($mime);
@@ -369,8 +370,7 @@ sub thread_html {
 	PublicInbox::WwwStream->response($ctx, 200, sub {
 		return unless $msgs;
 		while ($mime = shift @$msgs) {
-			$mid = mid_clean(mid_mime($mime));
-			$mime = $inbox->msg_by_mid($mid) and last;
+			$mime = $inbox->msg_by_smsg($mime) and last;
 		}
 		if ($mime) {
 			$mime = Email::MIME->new($mime);
@@ -738,7 +738,7 @@ sub indent_for {
 sub load_results {
 	my ($sres) = @_;
 
-	[ map { $_->mini_mime } @{delete $sres->{msgs}} ];
+	[ map { $_->ensure_metadata; $_ } @{delete $sres->{msgs}} ];
 }
 
 sub msg_timestamp {
@@ -771,13 +771,13 @@ sub _msg_date {
 sub fmt_ts { POSIX::strftime('%Y-%m-%d %k:%M', gmtime($_[0])) }
 
 sub _skel_header {
-	my ($ctx, $hdr, $level) = @_;
+	my ($ctx, $smsg, $level) = @_;
 
 	my $dst = $ctx->{dst};
 	my $cur = $ctx->{cur};
-	my $mid = mid_clean($hdr->header_raw('Message-ID'));
-	my $f = ascii_html($hdr->header('X-PI-From'));
-	my $d = _msg_date($hdr) . ' ' . indent_for($level) . th_pfx($level);
+	my $mid = $smsg->{mid};
+	my $f = ascii_html($smsg->from_name);
+	my $d = fmt_ts($smsg->{ts}) . ' ' . indent_for($level) . th_pfx($level);
 	my $attr = $f;
 	$ctx->{first_level} ||= $level;
 
@@ -802,7 +802,7 @@ sub _skel_header {
 	# Subject is never undef, this mail was loaded from
 	# our Xapian which would've resulted in '' if it were
 	# really missing (and Filter rejects empty subjects)
-	my $s = $hdr->header('Subject');
+	my $s = $smsg->subject;
 	my $h = $ctx->{srch}->subject_path($s);
 	if ($ctx->{seen}->{$h}) {
 		$s = undef;
@@ -829,10 +829,10 @@ sub _skel_header {
 
 sub skel_dump {
 	my ($ctx, $level, $node) = @_;
-	if (my $mime = $node->message) {
-		_skel_header($ctx, $mime->header_obj, $level);
+	if (my $smsg = $node->{smsg}) {
+		_skel_header($ctx, $smsg, $level);
 	} else {
-		my $mid = $node->messageid;
+		my $mid = $node->{id};
 		my $dst = $ctx->{dst};
 		my $mapping = $ctx->{mapping};
 		my $map = $mapping->{$mid} if $mapping;
@@ -857,31 +857,24 @@ sub skel_dump {
 
 sub sort_ts {
 	[ sort {
-		(eval { $a->topmost->message->header('X-PI-TS') } || 0) <=>
-		(eval { $b->topmost->message->header('X-PI-TS') } || 0)
+		(eval { $a->topmost->{smsg}->ts } || 0) <=>
+		(eval { $b->topmost->{smsg}->ts } || 0)
 	} @{$_[0]} ];
 }
 
-sub _tryload_ghost ($$) {
-	my ($srch, $mid) = @_;
-	my $smsg = $srch->lookup_mail($mid) or return;
-	$smsg->mini_mime;
-}
-
 # accumulate recent topics if search is supported
 # returns 200 if done, 404 if not
 sub acc_topic {
 	my ($ctx, $level, $node) = @_;
 	my $srch = $ctx->{srch};
-	my $mid = $node->messageid;
-	my $x = $node->message || _tryload_ghost($srch, $mid);
+	my $mid = $node->{id};
+	my $x = $node->{smsg} || $srch->lookup_mail($mid);
 	my ($subj, $ts);
 	my $topic;
 	if ($x) {
-		$x = $x->header_obj;
-		$subj = $x->header('Subject') || '';
+		$subj = $x->subject;
 		$subj = $srch->subject_normalized($subj);
-		$ts = $x->header('X-PI-TS');
+		$ts = $x->ts;
 		if ($level == 0) {
 			$topic = [ $ts, 1, { $subj => $mid }, $subj ];
 			$ctx->{-cur_topic} = $topic;
@@ -1023,9 +1016,10 @@ sub thread_adj_level {
 }
 
 sub ghost_index_entry {
-	my ($ctx, $level, $mid) = @_;
+	my ($ctx, $level, $node) = @_;
 	my ($beg, $end) = thread_adj_level($ctx,  $level);
-	$beg . '<pre>'. ghost_parent($ctx->{-upfx}, $mid) . '</pre>' . $end;
+	$beg . '<pre>'. ghost_parent($ctx->{-upfx}, $node->{id})
+		. '</pre>' . $end;
 }
 
 1;
diff --git a/t/search.t b/t/search.t
index cce3b9e..eed9c9b 100644
--- a/t/search.t
+++ b/t/search.t
@@ -293,7 +293,7 @@ sub filter_mids {
 	$smsg->ensure_metadata;
 	is($smsg->references, '', "no references created");
 	my $msg = PublicInbox::SearchMsg->load_doc($smsg->{doc});
-	is($s, $msg->mini_mime->header('Subject'), 'long subject not rewritten');
+	is($s, $msg->subject, 'long subject not rewritten');
 }
 
 {
@@ -310,10 +310,7 @@ sub filter_mids {
 	my $smsg = $rw->lookup_message('testmessage@example.com');
 	my $msg = PublicInbox::SearchMsg->load_doc($smsg->{doc});
 
-	# mini_mime technically not valid (I think),
-	# but good enough for displaying HTML:
-	is($mime->header('Subject'), $msg->mini_mime->header('Subject'),
-		'UTF-8 subject preserved');
+	is($mime->header('Subject'), $msg->subject, 'UTF-8 subject preserved');
 }
 
 {
-- 
EW


^ permalink raw reply related	[relevance 3%]

* [PATCH 0/17] remove Mail::Thread dependency
@ 2016-10-05 23:57  7% Eric Wong
  2016-10-05 23:57  3% ` [PATCH 06/17] thread: remove Email::Abstract wrapping Eric Wong
  0 siblings, 1 reply; 2+ results
From: Eric Wong @ 2016-10-05 23:57 UTC (permalink / raw)
  To: meta

This greatly reduces the amount of code we need to load while
reducing abstractions which slow us down and hurt memory use
when displaying gigantic threads.

More may be done and we may use SearchMsg directly for threading
in the future and obviate the need for the container
abstraction.

Eric Wong (17):
      view: remove "subject dummy" references
      thread: remove Mail::Thread dependency
      thread: pass array refs instead of entire arrays
      thread: remove accessor usage in internals
      inbox: deal with ghost smsg
      thread: remove Email::Abstract wrapping
      thread: remove rootset accessor method
      thread: simplify
      thread: remove iterate_down
      thread: avoid incrementing undefined value
      thread: order_children no longer cares about depth
      thread: inline and remove recurse_down logic
      thread: fix sorting without topmost
      thread: use hash + array instead of hand-rolled linked list
      view: remove redundant children array in thread views
      t/thread-cycle: test self-referential messages
      thread: remove weaken dependency

 INSTALL                         |   1 -
 MANIFEST                        |   3 +-
 Makefile.PL                     |   1 -
 lib/PublicInbox/Inbox.pm        |   2 +
 lib/PublicInbox/SearchIdx.pm    |   4 +-
 lib/PublicInbox/SearchMsg.pm    |  29 -------
 lib/PublicInbox/SearchThread.pm | 147 +++++++++++++++++++++++++++++++++++
 lib/PublicInbox/SearchView.pm   |  15 ++--
 lib/PublicInbox/Thread.pm       |  86 ---------------------
 lib/PublicInbox/View.pm         | 165 ++++++++++++++++++----------------------
 lib/PublicInbox/WWW.pm          |   2 +-
 t/plack.t                       |   3 +-
 t/search.t                      |   7 +-
 t/thread-cycle.t                |  97 +++++++++++++++++++++++
 14 files changed, 333 insertions(+), 229 deletions(-)


^ permalink raw reply	[relevance 7%]

Results 1-2 of 2 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2016-10-05 23:57  7% [PATCH 0/17] remove Mail::Thread dependency Eric Wong
2016-10-05 23:57  3% ` [PATCH 06/17] thread: remove Email::Abstract wrapping Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).