user/dev discussion of public-inbox itself
 help / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 06/17] thread: remove Email::Abstract wrapping
Date: Wed,  5 Oct 2016 23:57:11 +0000
Message-ID: <20161005235722.14857-7-e@80x24.org> (raw)
In-Reply-To: <20161005235722.14857-1-e@80x24.org>

This roughly doubles performance due to the reduction in
object creation and abstraction layers.
---
 lib/PublicInbox/SearchMsg.pm    | 29 ------------
 lib/PublicInbox/SearchThread.pm | 99 ++++++++++++-----------------------------
 lib/PublicInbox/SearchView.pm   |  8 ++--
 lib/PublicInbox/View.pm         | 90 +++++++++++++++++--------------------
 t/search.t                      |  7 +--
 5 files changed, 76 insertions(+), 157 deletions(-)

diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm
index 9d873c4..9dcc1e6 100644
--- a/lib/PublicInbox/SearchMsg.pm
+++ b/lib/PublicInbox/SearchMsg.pm
@@ -144,35 +144,6 @@ sub ensure_metadata {
 	}
 }
 
-# for threading only
-sub mini_mime {
-	my ($self) = @_;
-	$self->ensure_metadata;
-	my @h = (
-		'Subject' => $self->subject,
-		'X-PI-From' => $self->from_name,
-		# prevent Email::Simple::Creator from running,
-		# this header is useless for threading as we use X-PI-TS
-		# for sorting and display:
-		'Date' => EPOCH_822,
-		'Message-ID' => "<$self->{mid}>",
-		'X-PI-TS' => $self->ts,
-	);
-	if (my $refs = $self->{references}) {
-		push @h, References => $refs;
-	}
-	my $mime = Email::MIME->create(header => \@h);
-	my $h = $mime->header_obj;
-
-	# set these headers manually since Encode::encode('MIME-Q', ...)
-	# will add spaces to long values when using header_str above.
-
-	# drop useless headers Email::MIME set for us
-	$h->header_set('Date');
-	$h->header_set('MIME-Version');
-	$mime;
-}
-
 sub mid ($;$) {
 	my ($self, $mid) = @_;
 
diff --git a/lib/PublicInbox/SearchThread.pm b/lib/PublicInbox/SearchThread.pm
index 53fec97..d39e9b6 100644
--- a/lib/PublicInbox/SearchThread.pm
+++ b/lib/PublicInbox/SearchThread.pm
@@ -20,7 +20,6 @@
 package PublicInbox::SearchThread;
 use strict;
 use warnings;
-use Email::Abstract;
 
 sub new {
 	return bless {
@@ -30,37 +29,6 @@ sub new {
 	}, $_[0];
 }
 
-sub _get_hdr {
-	my ($class, $msg, $hdr) = @_;
-	Email::Abstract->get_header($msg, $hdr) || '';
-}
-
-sub _uniq {
-	my %seen;
-	return grep { !$seen{$_}++ } @_;
-}
-
-sub _references {
-	my $class = shift;
-	my $msg = shift;
-	my @references = ($class->_get_hdr($msg, "References") =~ /<([^>]+)>/g);
-	my $foo = $class->_get_hdr($msg, "In-Reply-To");
-	chomp $foo;
-	$foo =~ s/.*?<([^>]+)>.*/$1/;
-	push @references, $foo
-	  if $foo =~ /^\S+\@\S+$/ && (!@references || $references[-1] ne $foo);
-	return _uniq(@references);
-}
-
-sub _msgid {
-	my ($class, $msg) = @_;
-	my $id = $class->_get_hdr($msg, "Message-ID");
-	die "attempt to thread message with no id" unless $id;
-	chomp $id;
-	$id =~ s/^<([^>]+)>.*/$1/; # We expect this not to have <>s
-	return $id;
-}
-
 sub rootset { @{$_[0]{rootset}} }
 
 sub thread {
@@ -77,14 +45,11 @@ sub _finish {
 	delete $self->{seen};
 }
 
-sub _get_cont_for_id {
-	my $self = shift;
-	my $id = shift;
-	$self->{id_table}{$id} ||= $self->_container_class->new($id);
+sub _get_cont_for_id ($$) {
+	my ($self, $mid) = @_;
+	$self->{id_table}{$mid} ||= PublicInbox::SearchThread::Msg->new($mid);
 }
 
-sub _container_class { 'PublicInbox::SearchThread::Container' }
-
 sub _setup {
 	my ($self) = @_;
 
@@ -92,41 +57,40 @@ sub _setup {
 }
 
 sub _add_message ($$) {
-	my ($self, $message) = @_;
+	my ($self, $smsg) = @_;
 
 	# A. if id_table...
-	my $this_container = $self->_get_cont_for_id($self->_msgid($message));
-	$this_container->{message} = $message;
+	my $this = _get_cont_for_id($self, $smsg->{mid});
+	$this->{smsg} = $smsg;
 
 	# B. For each element in the message's References field:
-	my @refs = $self->_references($message);
-
 	my $prev;
-	for my $ref (@refs) {
-		# Find a Container object for the given Message-ID
-		my $container = $self->_get_cont_for_id($ref);
-
-		# Link the References field's Containers together in the
-		# order implied by the References header
-		# * If they are already linked don't change the existing links
-		# * Do not add a link if adding that link would introduce
-		#   a loop...
-
-		if ($prev &&
-			!$container->{parent} &&  # already linked
-			!$container->has_descendent($prev) # would loop
-		   ) {
-			$prev->add_child($container);
+	if (defined(my $refs = $smsg->{references})) {
+		foreach my $ref ($refs =~ m/<([^>]+)>/g) {
+			# Find a Container object for the given Message-ID
+			my $cont = _get_cont_for_id($self, $ref);
+
+			# Link the References field's Containers together in
+			# the order implied by the References header
+			#
+			# * If they are already linked don't change the
+			#   existing links
+			# * Do not add a link if adding that link would
+			#   introduce a loop...
+			if ($prev &&
+				!$cont->{parent} &&  # already linked
+				!$cont->has_descendent($prev) # would loop
+			   ) {
+				$prev->add_child($cont);
+			}
+			$prev = $cont;
 		}
-		$prev = $container;
 	}
 
 	# C. Set the parent of this message to be the last element in
 	# References...
-	if ($prev &&
-		!$this_container->has_descendent($prev) # would loop
-	   ) {
-		$prev->add_child($this_container)
+	if ($prev && !$this->has_descendent($prev)) { # would loop
+		$prev->add_child($this)
 	}
 }
 
@@ -135,7 +99,7 @@ sub order {
 	my $ordersub = shift;
 
 	# make a fake root
-	my $root = $self->_container_class->new( 'fakeroot' );
+	my $root = _get_cont_for_id($self, 'fakeroot');
 	$root->add_child( $_ ) for @{ $self->{rootset} };
 
 	# sort it
@@ -147,17 +111,12 @@ sub order {
 	$root->remove_child($_) for @$kids;
 }
 
-package PublicInbox::SearchThread::Container;
+package PublicInbox::SearchThread::Msg;
 use Carp qw(croak);
 use Scalar::Util qw(weaken);
 
 sub new { my $self = shift; bless { id => shift }, $self; }
 
-sub message { $_[0]->{message} }
-sub child { $_[0]->{child} }
-sub next { $_[0]->{next} }
-sub messageid { $_[0]->{id} }
-
 sub add_child {
 	my ($self, $child) = @_;
 	croak "Cowardly refusing to become my own parent: $self"
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index 0d54c3d..ebeb41f 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -148,7 +148,6 @@ sub mset_thread {
 		my $i = $_;
 		my $m = PublicInbox::SearchMsg->load_doc($i->get_document);
 		$pct{$m->mid} = $i->get_percent;
-		$m = $m->mini_mime;
 		$m;
 	} ($mset->items);
 
@@ -156,9 +155,9 @@ sub mset_thread {
 	$th->thread;
 	if ($q->{r}) { # order by relevance
 		$th->order(sub {
-			[ sort { (eval { $pct{$b->topmost->messageid} } || 0)
+			[ sort { (eval { $pct{$b->topmost->{id}} } || 0)
 					<=>
-				(eval { $pct{$a->topmost->messageid} } || 0)
+				(eval { $pct{$a->topmost->{id}} } || 0)
 			} @{$_[0]} ];
 		});
 	} else { # order by time (default for threaded view)
@@ -185,8 +184,7 @@ sub mset_thread {
 	sub {
 		return unless $msgs;
 		while ($mime = shift @$msgs) {
-			my $mid = mid_clean(mid_mime($mime));
-			$mime = $inbox->msg_by_mid($mid) and last;
+			$mime = $inbox->msg_by_smsg($mime) and last;
 		}
 		if ($mime) {
 			$mime = Email::MIME->new($mime);
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index e90efda..748e391 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -214,12 +214,12 @@ sub _th_index_lite {
 		$rv .= $pad . $irt_map->[1];
 		if ($idx > 0) {
 			my $prev = $siblings->[$idx - 1];
-			my $pmid = $prev->messageid;
+			my $pmid = $prev->{id};
 			if ($idx > 2) {
 				my $s = ($idx - 1). ' preceding siblings ...';
 				$rv .= pad_link($pmid, $level, $s);
 			} elsif ($idx == 2) {
-				my $ppmid = $siblings->[0]->messageid;
+				my $ppmid = $siblings->[0]->{id};
 				$rv .= $pad . $mapping->{$ppmid}->[1];
 			}
 			$rv .= $pad . $mapping->{$pmid}->[1];
@@ -233,25 +233,25 @@ sub _th_index_lite {
 	$this =~ s!<a\nhref=[^>]+>([^<]+)</a>!$1!s; # no point linking to self
 	$rv .= "<b>@ $this";
 	my $node = $map->[2];
-	if (my $child = $node->child) {
-		my $cmid = $child->messageid;
+	if (my $child = $node->{child}) {
+		my $cmid = $child->{id};
 		$rv .= $pad . $mapping->{$cmid}->[1];
 		if ($nr_c > 2) {
 			my $s = ($nr_c - 1). ' more replies';
 			$rv .= pad_link($cmid, $level + 1, $s);
-		} elsif (my $cn = $child->next) {
-			$rv .= $pad . $mapping->{$cn->messageid}->[1];
+		} elsif (my $cn = $child->{next}) {
+			$rv .= $pad . $mapping->{$cn->{id}}->[1];
 		}
 	}
-	if (my $next = $node->next) {
-		my $nmid = $next->messageid;
+	if (my $next = $node->{next}) {
+		my $nmid = $next->{id};
 		$rv .= $pad . $mapping->{$nmid}->[1];
 		my $nnext = $nr_s - $idx;
 		if ($nnext > 2) {
 			my $s = ($nnext - 1).' subsequent siblings';
 			$rv .= pad_link($nmid, $level, $s);
-		} elsif (my $nn = $next->next) {
-			$rv .= $pad . $mapping->{$nn->messageid}->[1];
+		} elsif (my $nn = $next->{next}) {
+			$rv .= $pad . $mapping->{$nn->{id}}->[1];
 		}
 	}
 	$rv .= $pad ."<a\nhref=#r$id>$s_s, $s_c; $ctx->{s_nr}</a>\n";
@@ -264,7 +264,7 @@ sub walk_thread {
 		my $level = shift @q;
 		my $node = shift @q or next;
 		$cb->($ctx, $level, $node);
-		unshift @q, $level+1, $node->child, $level, $node->next;
+		unshift @q, $level+1, $node->{child}, $level, $node->{next};
 	}
 }
 
@@ -272,12 +272,12 @@ sub pre_thread  {
 	my ($ctx, $level, $node) = @_;
 	my $mapping = $ctx->{mapping};
 	my $idx = -1;
-	if (my $parent = $node->parent) {
-		my $m = $mapping->{$parent->messageid}->[0];
+	if (my $parent = $node->{parent}) {
+		my $m = $mapping->{$parent->{id}}->[0];
 		$idx = scalar @$m;
 		push @$m, $node;
 	}
-	$mapping->{$node->messageid} = [ [], '', $node, $idx, $level ];
+	$mapping->{$node->{id}} = [ [], '', $node, $idx, $level ];
 	skel_dump($ctx, $level, $node);
 }
 
@@ -296,8 +296,8 @@ sub stream_thread ($$) {
 	while (@q) {
 		$level = shift @q;
 		my $node = shift @q or next;
-		unshift @q, $level+1, $node->child, $level, $node->next;
-		$mime = $inbox->msg_by_mid($node->messageid) and last;
+		unshift @q, $level+1, $node->{child}, $level, $node->{next};
+		$mime = $inbox->msg_by_smsg($node->{smsg}) and last;
 	}
 	return missing_thread($ctx) unless $mime;
 
@@ -309,13 +309,14 @@ sub stream_thread ($$) {
 		while (@q) {
 			$level = shift @q;
 			my $node = shift @q or next;
-			unshift @q, $level+1, $node->child, $level, $node->next;
-			my $mid = $node->messageid;
-			if ($mime = $inbox->msg_by_mid($mid)) {
+			unshift @q, $level+1, $node->{child},
+					$level, $node->{next};
+			my $mid = $node->{id};
+			if ($mime = $inbox->msg_by_smsg($node->{smsg})) {
 				$mime = Email::MIME->new($mime);
 				return thread_index_entry($ctx, $level, $mime);
 			} else {
-				return ghost_index_entry($ctx, $level, $mid);
+				return ghost_index_entry($ctx, $level, $node);
 			}
 		}
 		my $ret = join('', thread_adj_level($ctx, 0));
@@ -355,11 +356,11 @@ sub thread_html {
 	$skel .= '</pre>';
 	return stream_thread($th, $ctx) unless $ctx->{flat};
 
-	# flat display: lazy load the full message from mini_mime:
+	# flat display: lazy load the full message from smsg
 	my $inbox = $ctx->{-inbox};
 	my $mime;
 	while ($mime = shift @$msgs) {
-		$mime = $inbox->msg_by_mid(mid_clean(mid_mime($mime))) and last;
+		$mime = $inbox->msg_by_smsg($mime) and last;
 	}
 	return missing_thread($ctx) unless $mime;
 	$mime = Email::MIME->new($mime);
@@ -369,8 +370,7 @@ sub thread_html {
 	PublicInbox::WwwStream->response($ctx, 200, sub {
 		return unless $msgs;
 		while ($mime = shift @$msgs) {
-			$mid = mid_clean(mid_mime($mime));
-			$mime = $inbox->msg_by_mid($mid) and last;
+			$mime = $inbox->msg_by_smsg($mime) and last;
 		}
 		if ($mime) {
 			$mime = Email::MIME->new($mime);
@@ -738,7 +738,7 @@ sub indent_for {
 sub load_results {
 	my ($sres) = @_;
 
-	[ map { $_->mini_mime } @{delete $sres->{msgs}} ];
+	[ map { $_->ensure_metadata; $_ } @{delete $sres->{msgs}} ];
 }
 
 sub msg_timestamp {
@@ -771,13 +771,13 @@ sub _msg_date {
 sub fmt_ts { POSIX::strftime('%Y-%m-%d %k:%M', gmtime($_[0])) }
 
 sub _skel_header {
-	my ($ctx, $hdr, $level) = @_;
+	my ($ctx, $smsg, $level) = @_;
 
 	my $dst = $ctx->{dst};
 	my $cur = $ctx->{cur};
-	my $mid = mid_clean($hdr->header_raw('Message-ID'));
-	my $f = ascii_html($hdr->header('X-PI-From'));
-	my $d = _msg_date($hdr) . ' ' . indent_for($level) . th_pfx($level);
+	my $mid = $smsg->{mid};
+	my $f = ascii_html($smsg->from_name);
+	my $d = fmt_ts($smsg->{ts}) . ' ' . indent_for($level) . th_pfx($level);
 	my $attr = $f;
 	$ctx->{first_level} ||= $level;
 
@@ -802,7 +802,7 @@ sub _skel_header {
 	# Subject is never undef, this mail was loaded from
 	# our Xapian which would've resulted in '' if it were
 	# really missing (and Filter rejects empty subjects)
-	my $s = $hdr->header('Subject');
+	my $s = $smsg->subject;
 	my $h = $ctx->{srch}->subject_path($s);
 	if ($ctx->{seen}->{$h}) {
 		$s = undef;
@@ -829,10 +829,10 @@ sub _skel_header {
 
 sub skel_dump {
 	my ($ctx, $level, $node) = @_;
-	if (my $mime = $node->message) {
-		_skel_header($ctx, $mime->header_obj, $level);
+	if (my $smsg = $node->{smsg}) {
+		_skel_header($ctx, $smsg, $level);
 	} else {
-		my $mid = $node->messageid;
+		my $mid = $node->{id};
 		my $dst = $ctx->{dst};
 		my $mapping = $ctx->{mapping};
 		my $map = $mapping->{$mid} if $mapping;
@@ -857,31 +857,24 @@ sub skel_dump {
 
 sub sort_ts {
 	[ sort {
-		(eval { $a->topmost->message->header('X-PI-TS') } || 0) <=>
-		(eval { $b->topmost->message->header('X-PI-TS') } || 0)
+		(eval { $a->topmost->{smsg}->ts } || 0) <=>
+		(eval { $b->topmost->{smsg}->ts } || 0)
 	} @{$_[0]} ];
 }
 
-sub _tryload_ghost ($$) {
-	my ($srch, $mid) = @_;
-	my $smsg = $srch->lookup_mail($mid) or return;
-	$smsg->mini_mime;
-}
-
 # accumulate recent topics if search is supported
 # returns 200 if done, 404 if not
 sub acc_topic {
 	my ($ctx, $level, $node) = @_;
 	my $srch = $ctx->{srch};
-	my $mid = $node->messageid;
-	my $x = $node->message || _tryload_ghost($srch, $mid);
+	my $mid = $node->{id};
+	my $x = $node->{smsg} || $srch->lookup_mail($mid);
 	my ($subj, $ts);
 	my $topic;
 	if ($x) {
-		$x = $x->header_obj;
-		$subj = $x->header('Subject') || '';
+		$subj = $x->subject;
 		$subj = $srch->subject_normalized($subj);
-		$ts = $x->header('X-PI-TS');
+		$ts = $x->ts;
 		if ($level == 0) {
 			$topic = [ $ts, 1, { $subj => $mid }, $subj ];
 			$ctx->{-cur_topic} = $topic;
@@ -1023,9 +1016,10 @@ sub thread_adj_level {
 }
 
 sub ghost_index_entry {
-	my ($ctx, $level, $mid) = @_;
+	my ($ctx, $level, $node) = @_;
 	my ($beg, $end) = thread_adj_level($ctx,  $level);
-	$beg . '<pre>'. ghost_parent($ctx->{-upfx}, $mid) . '</pre>' . $end;
+	$beg . '<pre>'. ghost_parent($ctx->{-upfx}, $node->{id})
+		. '</pre>' . $end;
 }
 
 1;
diff --git a/t/search.t b/t/search.t
index cce3b9e..eed9c9b 100644
--- a/t/search.t
+++ b/t/search.t
@@ -293,7 +293,7 @@ sub filter_mids {
 	$smsg->ensure_metadata;
 	is($smsg->references, '', "no references created");
 	my $msg = PublicInbox::SearchMsg->load_doc($smsg->{doc});
-	is($s, $msg->mini_mime->header('Subject'), 'long subject not rewritten');
+	is($s, $msg->subject, 'long subject not rewritten');
 }
 
 {
@@ -310,10 +310,7 @@ sub filter_mids {
 	my $smsg = $rw->lookup_message('testmessage@example.com');
 	my $msg = PublicInbox::SearchMsg->load_doc($smsg->{doc});
 
-	# mini_mime technically not valid (I think),
-	# but good enough for displaying HTML:
-	is($mime->header('Subject'), $msg->mini_mime->header('Subject'),
-		'UTF-8 subject preserved');
+	is($mime->header('Subject'), $msg->subject, 'UTF-8 subject preserved');
 }
 
 {
-- 
EW


  parent reply index

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-10-05 23:57 [PATCH 0/17] remove Mail::Thread dependency Eric Wong
2016-10-05 23:57 ` [PATCH 01/17] view: remove "subject dummy" references Eric Wong
2016-10-05 23:57 ` [PATCH 02/17] thread: remove Mail::Thread dependency Eric Wong
2016-10-05 23:57 ` [PATCH 03/17] thread: pass array refs instead of entire arrays Eric Wong
2016-10-05 23:57 ` [PATCH 04/17] thread: remove accessor usage in internals Eric Wong
2016-10-05 23:57 ` [PATCH 05/17] inbox: deal with ghost smsg Eric Wong
2016-10-05 23:57 ` Eric Wong [this message]
2016-10-05 23:57 ` [PATCH 07/17] thread: remove rootset accessor method Eric Wong
2016-10-05 23:57 ` [PATCH 08/17] thread: simplify Eric Wong
2016-10-05 23:57 ` [PATCH 09/17] thread: remove iterate_down Eric Wong
2016-10-05 23:57 ` [PATCH 10/17] thread: avoid incrementing undefined value Eric Wong
2016-10-05 23:57 ` [PATCH 11/17] thread: order_children no longer cares about depth Eric Wong
2016-10-05 23:57 ` [PATCH 12/17] thread: inline and remove recurse_down logic Eric Wong
2016-10-05 23:57 ` [PATCH 13/17] thread: fix sorting without topmost Eric Wong
2016-10-14 21:17   ` [PATCH] thread: reinstates stable ordering when ghosts are present Eric Wong
2016-10-05 23:57 ` [PATCH 14/17] thread: use hash + array instead of hand-rolled linked list Eric Wong
2016-10-05 23:57 ` [PATCH 15/17] view: remove redundant children array in thread views Eric Wong
2016-10-05 23:57 ` [PATCH 16/17] t/thread-cycle: test self-referential messages Eric Wong
2016-10-05 23:57 ` [PATCH 17/17] thread: remove weaken dependency Eric Wong
2016-10-06  8:22 ` [PATCH 0/17] remove Mail::Thread dependency Eric Wong
2016-10-13  3:59   ` [PATCH 0/2] thread: fix regressions from Mail::Thread removal Eric Wong

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20161005235722.14857-7-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

user/dev discussion of public-inbox itself

Archives are clonable:
	git clone --mirror https://public-inbox.org/meta
	git clone --mirror http://czquwvybam4bgbro.onion/meta
	git clone --mirror http://hjrcffqmbrq6wope.onion/meta
	git clone --mirror http://ou63pmih66umazou.onion/meta

Newsgroups are available over NNTP:
	nntp://news.public-inbox.org/inbox.comp.mail.public-inbox.meta
	nntp://ou63pmih66umazou.onion/inbox.comp.mail.public-inbox.meta
	nntp://czquwvybam4bgbro.onion/inbox.comp.mail.public-inbox.meta
	nntp://hjrcffqmbrq6wope.onion/inbox.comp.mail.public-inbox.meta
	nntp://news.gmane.org/gmane.mail.public-inbox.general

 note: .onion URLs require Tor: https://www.torproject.org/
       or Tor2web: https://www.tor2web.org/

AGPL code for this site: git clone https://public-inbox.org/ public-inbox