* [PATCH 06/17] thread: remove Email::Abstract wrapping
2016-10-05 23:57 7% [PATCH 0/17] remove Mail::Thread dependency Eric Wong
@ 2016-10-05 23:57 3% ` Eric Wong
0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2016-10-05 23:57 UTC (permalink / raw)
To: meta
This roughly doubles performance due to the reduction in
object creation and abstraction layers.
---
lib/PublicInbox/SearchMsg.pm | 29 ------------
lib/PublicInbox/SearchThread.pm | 99 ++++++++++++-----------------------------
lib/PublicInbox/SearchView.pm | 8 ++--
lib/PublicInbox/View.pm | 90 +++++++++++++++++--------------------
t/search.t | 7 +--
5 files changed, 76 insertions(+), 157 deletions(-)
diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm
index 9d873c4..9dcc1e6 100644
--- a/lib/PublicInbox/SearchMsg.pm
+++ b/lib/PublicInbox/SearchMsg.pm
@@ -144,35 +144,6 @@ sub ensure_metadata {
}
}
-# for threading only
-sub mini_mime {
- my ($self) = @_;
- $self->ensure_metadata;
- my @h = (
- 'Subject' => $self->subject,
- 'X-PI-From' => $self->from_name,
- # prevent Email::Simple::Creator from running,
- # this header is useless for threading as we use X-PI-TS
- # for sorting and display:
- 'Date' => EPOCH_822,
- 'Message-ID' => "<$self->{mid}>",
- 'X-PI-TS' => $self->ts,
- );
- if (my $refs = $self->{references}) {
- push @h, References => $refs;
- }
- my $mime = Email::MIME->create(header => \@h);
- my $h = $mime->header_obj;
-
- # set these headers manually since Encode::encode('MIME-Q', ...)
- # will add spaces to long values when using header_str above.
-
- # drop useless headers Email::MIME set for us
- $h->header_set('Date');
- $h->header_set('MIME-Version');
- $mime;
-}
-
sub mid ($;$) {
my ($self, $mid) = @_;
diff --git a/lib/PublicInbox/SearchThread.pm b/lib/PublicInbox/SearchThread.pm
index 53fec97..d39e9b6 100644
--- a/lib/PublicInbox/SearchThread.pm
+++ b/lib/PublicInbox/SearchThread.pm
@@ -20,7 +20,6 @@
package PublicInbox::SearchThread;
use strict;
use warnings;
-use Email::Abstract;
sub new {
return bless {
@@ -30,37 +29,6 @@ sub new {
}, $_[0];
}
-sub _get_hdr {
- my ($class, $msg, $hdr) = @_;
- Email::Abstract->get_header($msg, $hdr) || '';
-}
-
-sub _uniq {
- my %seen;
- return grep { !$seen{$_}++ } @_;
-}
-
-sub _references {
- my $class = shift;
- my $msg = shift;
- my @references = ($class->_get_hdr($msg, "References") =~ /<([^>]+)>/g);
- my $foo = $class->_get_hdr($msg, "In-Reply-To");
- chomp $foo;
- $foo =~ s/.*?<([^>]+)>.*/$1/;
- push @references, $foo
- if $foo =~ /^\S+\@\S+$/ && (!@references || $references[-1] ne $foo);
- return _uniq(@references);
-}
-
-sub _msgid {
- my ($class, $msg) = @_;
- my $id = $class->_get_hdr($msg, "Message-ID");
- die "attempt to thread message with no id" unless $id;
- chomp $id;
- $id =~ s/^<([^>]+)>.*/$1/; # We expect this not to have <>s
- return $id;
-}
-
sub rootset { @{$_[0]{rootset}} }
sub thread {
@@ -77,14 +45,11 @@ sub _finish {
delete $self->{seen};
}
-sub _get_cont_for_id {
- my $self = shift;
- my $id = shift;
- $self->{id_table}{$id} ||= $self->_container_class->new($id);
+sub _get_cont_for_id ($$) {
+ my ($self, $mid) = @_;
+ $self->{id_table}{$mid} ||= PublicInbox::SearchThread::Msg->new($mid);
}
-sub _container_class { 'PublicInbox::SearchThread::Container' }
-
sub _setup {
my ($self) = @_;
@@ -92,41 +57,40 @@ sub _setup {
}
sub _add_message ($$) {
- my ($self, $message) = @_;
+ my ($self, $smsg) = @_;
# A. if id_table...
- my $this_container = $self->_get_cont_for_id($self->_msgid($message));
- $this_container->{message} = $message;
+ my $this = _get_cont_for_id($self, $smsg->{mid});
+ $this->{smsg} = $smsg;
# B. For each element in the message's References field:
- my @refs = $self->_references($message);
-
my $prev;
- for my $ref (@refs) {
- # Find a Container object for the given Message-ID
- my $container = $self->_get_cont_for_id($ref);
-
- # Link the References field's Containers together in the
- # order implied by the References header
- # * If they are already linked don't change the existing links
- # * Do not add a link if adding that link would introduce
- # a loop...
-
- if ($prev &&
- !$container->{parent} && # already linked
- !$container->has_descendent($prev) # would loop
- ) {
- $prev->add_child($container);
+ if (defined(my $refs = $smsg->{references})) {
+ foreach my $ref ($refs =~ m/<([^>]+)>/g) {
+ # Find a Container object for the given Message-ID
+ my $cont = _get_cont_for_id($self, $ref);
+
+ # Link the References field's Containers together in
+ # the order implied by the References header
+ #
+ # * If they are already linked don't change the
+ # existing links
+ # * Do not add a link if adding that link would
+ # introduce a loop...
+ if ($prev &&
+ !$cont->{parent} && # already linked
+ !$cont->has_descendent($prev) # would loop
+ ) {
+ $prev->add_child($cont);
+ }
+ $prev = $cont;
}
- $prev = $container;
}
# C. Set the parent of this message to be the last element in
# References...
- if ($prev &&
- !$this_container->has_descendent($prev) # would loop
- ) {
- $prev->add_child($this_container)
+ if ($prev && !$this->has_descendent($prev)) { # would loop
+ $prev->add_child($this)
}
}
@@ -135,7 +99,7 @@ sub order {
my $ordersub = shift;
# make a fake root
- my $root = $self->_container_class->new( 'fakeroot' );
+ my $root = _get_cont_for_id($self, 'fakeroot');
$root->add_child( $_ ) for @{ $self->{rootset} };
# sort it
@@ -147,17 +111,12 @@ sub order {
$root->remove_child($_) for @$kids;
}
-package PublicInbox::SearchThread::Container;
+package PublicInbox::SearchThread::Msg;
use Carp qw(croak);
use Scalar::Util qw(weaken);
sub new { my $self = shift; bless { id => shift }, $self; }
-sub message { $_[0]->{message} }
-sub child { $_[0]->{child} }
-sub next { $_[0]->{next} }
-sub messageid { $_[0]->{id} }
-
sub add_child {
my ($self, $child) = @_;
croak "Cowardly refusing to become my own parent: $self"
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index 0d54c3d..ebeb41f 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -148,7 +148,6 @@ sub mset_thread {
my $i = $_;
my $m = PublicInbox::SearchMsg->load_doc($i->get_document);
$pct{$m->mid} = $i->get_percent;
- $m = $m->mini_mime;
$m;
} ($mset->items);
@@ -156,9 +155,9 @@ sub mset_thread {
$th->thread;
if ($q->{r}) { # order by relevance
$th->order(sub {
- [ sort { (eval { $pct{$b->topmost->messageid} } || 0)
+ [ sort { (eval { $pct{$b->topmost->{id}} } || 0)
<=>
- (eval { $pct{$a->topmost->messageid} } || 0)
+ (eval { $pct{$a->topmost->{id}} } || 0)
} @{$_[0]} ];
});
} else { # order by time (default for threaded view)
@@ -185,8 +184,7 @@ sub mset_thread {
sub {
return unless $msgs;
while ($mime = shift @$msgs) {
- my $mid = mid_clean(mid_mime($mime));
- $mime = $inbox->msg_by_mid($mid) and last;
+ $mime = $inbox->msg_by_smsg($mime) and last;
}
if ($mime) {
$mime = Email::MIME->new($mime);
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index e90efda..748e391 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -214,12 +214,12 @@ sub _th_index_lite {
$rv .= $pad . $irt_map->[1];
if ($idx > 0) {
my $prev = $siblings->[$idx - 1];
- my $pmid = $prev->messageid;
+ my $pmid = $prev->{id};
if ($idx > 2) {
my $s = ($idx - 1). ' preceding siblings ...';
$rv .= pad_link($pmid, $level, $s);
} elsif ($idx == 2) {
- my $ppmid = $siblings->[0]->messageid;
+ my $ppmid = $siblings->[0]->{id};
$rv .= $pad . $mapping->{$ppmid}->[1];
}
$rv .= $pad . $mapping->{$pmid}->[1];
@@ -233,25 +233,25 @@ sub _th_index_lite {
$this =~ s!<a\nhref=[^>]+>([^<]+)</a>!$1!s; # no point linking to self
$rv .= "<b>@ $this";
my $node = $map->[2];
- if (my $child = $node->child) {
- my $cmid = $child->messageid;
+ if (my $child = $node->{child}) {
+ my $cmid = $child->{id};
$rv .= $pad . $mapping->{$cmid}->[1];
if ($nr_c > 2) {
my $s = ($nr_c - 1). ' more replies';
$rv .= pad_link($cmid, $level + 1, $s);
- } elsif (my $cn = $child->next) {
- $rv .= $pad . $mapping->{$cn->messageid}->[1];
+ } elsif (my $cn = $child->{next}) {
+ $rv .= $pad . $mapping->{$cn->{id}}->[1];
}
}
- if (my $next = $node->next) {
- my $nmid = $next->messageid;
+ if (my $next = $node->{next}) {
+ my $nmid = $next->{id};
$rv .= $pad . $mapping->{$nmid}->[1];
my $nnext = $nr_s - $idx;
if ($nnext > 2) {
my $s = ($nnext - 1).' subsequent siblings';
$rv .= pad_link($nmid, $level, $s);
- } elsif (my $nn = $next->next) {
- $rv .= $pad . $mapping->{$nn->messageid}->[1];
+ } elsif (my $nn = $next->{next}) {
+ $rv .= $pad . $mapping->{$nn->{id}}->[1];
}
}
$rv .= $pad ."<a\nhref=#r$id>$s_s, $s_c; $ctx->{s_nr}</a>\n";
@@ -264,7 +264,7 @@ sub walk_thread {
my $level = shift @q;
my $node = shift @q or next;
$cb->($ctx, $level, $node);
- unshift @q, $level+1, $node->child, $level, $node->next;
+ unshift @q, $level+1, $node->{child}, $level, $node->{next};
}
}
@@ -272,12 +272,12 @@ sub pre_thread {
my ($ctx, $level, $node) = @_;
my $mapping = $ctx->{mapping};
my $idx = -1;
- if (my $parent = $node->parent) {
- my $m = $mapping->{$parent->messageid}->[0];
+ if (my $parent = $node->{parent}) {
+ my $m = $mapping->{$parent->{id}}->[0];
$idx = scalar @$m;
push @$m, $node;
}
- $mapping->{$node->messageid} = [ [], '', $node, $idx, $level ];
+ $mapping->{$node->{id}} = [ [], '', $node, $idx, $level ];
skel_dump($ctx, $level, $node);
}
@@ -296,8 +296,8 @@ sub stream_thread ($$) {
while (@q) {
$level = shift @q;
my $node = shift @q or next;
- unshift @q, $level+1, $node->child, $level, $node->next;
- $mime = $inbox->msg_by_mid($node->messageid) and last;
+ unshift @q, $level+1, $node->{child}, $level, $node->{next};
+ $mime = $inbox->msg_by_smsg($node->{smsg}) and last;
}
return missing_thread($ctx) unless $mime;
@@ -309,13 +309,14 @@ sub stream_thread ($$) {
while (@q) {
$level = shift @q;
my $node = shift @q or next;
- unshift @q, $level+1, $node->child, $level, $node->next;
- my $mid = $node->messageid;
- if ($mime = $inbox->msg_by_mid($mid)) {
+ unshift @q, $level+1, $node->{child},
+ $level, $node->{next};
+ my $mid = $node->{id};
+ if ($mime = $inbox->msg_by_smsg($node->{smsg})) {
$mime = Email::MIME->new($mime);
return thread_index_entry($ctx, $level, $mime);
} else {
- return ghost_index_entry($ctx, $level, $mid);
+ return ghost_index_entry($ctx, $level, $node);
}
}
my $ret = join('', thread_adj_level($ctx, 0));
@@ -355,11 +356,11 @@ sub thread_html {
$skel .= '</pre>';
return stream_thread($th, $ctx) unless $ctx->{flat};
- # flat display: lazy load the full message from mini_mime:
+ # flat display: lazy load the full message from smsg
my $inbox = $ctx->{-inbox};
my $mime;
while ($mime = shift @$msgs) {
- $mime = $inbox->msg_by_mid(mid_clean(mid_mime($mime))) and last;
+ $mime = $inbox->msg_by_smsg($mime) and last;
}
return missing_thread($ctx) unless $mime;
$mime = Email::MIME->new($mime);
@@ -369,8 +370,7 @@ sub thread_html {
PublicInbox::WwwStream->response($ctx, 200, sub {
return unless $msgs;
while ($mime = shift @$msgs) {
- $mid = mid_clean(mid_mime($mime));
- $mime = $inbox->msg_by_mid($mid) and last;
+ $mime = $inbox->msg_by_smsg($mime) and last;
}
if ($mime) {
$mime = Email::MIME->new($mime);
@@ -738,7 +738,7 @@ sub indent_for {
sub load_results {
my ($sres) = @_;
- [ map { $_->mini_mime } @{delete $sres->{msgs}} ];
+ [ map { $_->ensure_metadata; $_ } @{delete $sres->{msgs}} ];
}
sub msg_timestamp {
@@ -771,13 +771,13 @@ sub _msg_date {
sub fmt_ts { POSIX::strftime('%Y-%m-%d %k:%M', gmtime($_[0])) }
sub _skel_header {
- my ($ctx, $hdr, $level) = @_;
+ my ($ctx, $smsg, $level) = @_;
my $dst = $ctx->{dst};
my $cur = $ctx->{cur};
- my $mid = mid_clean($hdr->header_raw('Message-ID'));
- my $f = ascii_html($hdr->header('X-PI-From'));
- my $d = _msg_date($hdr) . ' ' . indent_for($level) . th_pfx($level);
+ my $mid = $smsg->{mid};
+ my $f = ascii_html($smsg->from_name);
+ my $d = fmt_ts($smsg->{ts}) . ' ' . indent_for($level) . th_pfx($level);
my $attr = $f;
$ctx->{first_level} ||= $level;
@@ -802,7 +802,7 @@ sub _skel_header {
# Subject is never undef, this mail was loaded from
# our Xapian which would've resulted in '' if it were
# really missing (and Filter rejects empty subjects)
- my $s = $hdr->header('Subject');
+ my $s = $smsg->subject;
my $h = $ctx->{srch}->subject_path($s);
if ($ctx->{seen}->{$h}) {
$s = undef;
@@ -829,10 +829,10 @@ sub _skel_header {
sub skel_dump {
my ($ctx, $level, $node) = @_;
- if (my $mime = $node->message) {
- _skel_header($ctx, $mime->header_obj, $level);
+ if (my $smsg = $node->{smsg}) {
+ _skel_header($ctx, $smsg, $level);
} else {
- my $mid = $node->messageid;
+ my $mid = $node->{id};
my $dst = $ctx->{dst};
my $mapping = $ctx->{mapping};
my $map = $mapping->{$mid} if $mapping;
@@ -857,31 +857,24 @@ sub skel_dump {
sub sort_ts {
[ sort {
- (eval { $a->topmost->message->header('X-PI-TS') } || 0) <=>
- (eval { $b->topmost->message->header('X-PI-TS') } || 0)
+ (eval { $a->topmost->{smsg}->ts } || 0) <=>
+ (eval { $b->topmost->{smsg}->ts } || 0)
} @{$_[0]} ];
}
-sub _tryload_ghost ($$) {
- my ($srch, $mid) = @_;
- my $smsg = $srch->lookup_mail($mid) or return;
- $smsg->mini_mime;
-}
-
# accumulate recent topics if search is supported
# returns 200 if done, 404 if not
sub acc_topic {
my ($ctx, $level, $node) = @_;
my $srch = $ctx->{srch};
- my $mid = $node->messageid;
- my $x = $node->message || _tryload_ghost($srch, $mid);
+ my $mid = $node->{id};
+ my $x = $node->{smsg} || $srch->lookup_mail($mid);
my ($subj, $ts);
my $topic;
if ($x) {
- $x = $x->header_obj;
- $subj = $x->header('Subject') || '';
+ $subj = $x->subject;
$subj = $srch->subject_normalized($subj);
- $ts = $x->header('X-PI-TS');
+ $ts = $x->ts;
if ($level == 0) {
$topic = [ $ts, 1, { $subj => $mid }, $subj ];
$ctx->{-cur_topic} = $topic;
@@ -1023,9 +1016,10 @@ sub thread_adj_level {
}
sub ghost_index_entry {
- my ($ctx, $level, $mid) = @_;
+ my ($ctx, $level, $node) = @_;
my ($beg, $end) = thread_adj_level($ctx, $level);
- $beg . '<pre>'. ghost_parent($ctx->{-upfx}, $mid) . '</pre>' . $end;
+ $beg . '<pre>'. ghost_parent($ctx->{-upfx}, $node->{id})
+ . '</pre>' . $end;
}
1;
diff --git a/t/search.t b/t/search.t
index cce3b9e..eed9c9b 100644
--- a/t/search.t
+++ b/t/search.t
@@ -293,7 +293,7 @@ sub filter_mids {
$smsg->ensure_metadata;
is($smsg->references, '', "no references created");
my $msg = PublicInbox::SearchMsg->load_doc($smsg->{doc});
- is($s, $msg->mini_mime->header('Subject'), 'long subject not rewritten');
+ is($s, $msg->subject, 'long subject not rewritten');
}
{
@@ -310,10 +310,7 @@ sub filter_mids {
my $smsg = $rw->lookup_message('testmessage@example.com');
my $msg = PublicInbox::SearchMsg->load_doc($smsg->{doc});
- # mini_mime technically not valid (I think),
- # but good enough for displaying HTML:
- is($mime->header('Subject'), $msg->mini_mime->header('Subject'),
- 'UTF-8 subject preserved');
+ is($mime->header('Subject'), $msg->subject, 'UTF-8 subject preserved');
}
{
--
EW
^ permalink raw reply related [relevance 3%]
* [PATCH 0/17] remove Mail::Thread dependency
@ 2016-10-05 23:57 7% Eric Wong
2016-10-05 23:57 3% ` [PATCH 06/17] thread: remove Email::Abstract wrapping Eric Wong
0 siblings, 1 reply; 2+ results
From: Eric Wong @ 2016-10-05 23:57 UTC (permalink / raw)
To: meta
This greatly reduces the amount of code we need to load while
reducing abstractions which slow us down and hurt memory use
when displaying gigantic threads.
More may be done and we may use SearchMsg directly for threading
in the future and obviate the need for the container
abstraction.
Eric Wong (17):
view: remove "subject dummy" references
thread: remove Mail::Thread dependency
thread: pass array refs instead of entire arrays
thread: remove accessor usage in internals
inbox: deal with ghost smsg
thread: remove Email::Abstract wrapping
thread: remove rootset accessor method
thread: simplify
thread: remove iterate_down
thread: avoid incrementing undefined value
thread: order_children no longer cares about depth
thread: inline and remove recurse_down logic
thread: fix sorting without topmost
thread: use hash + array instead of hand-rolled linked list
view: remove redundant children array in thread views
t/thread-cycle: test self-referential messages
thread: remove weaken dependency
INSTALL | 1 -
MANIFEST | 3 +-
Makefile.PL | 1 -
lib/PublicInbox/Inbox.pm | 2 +
lib/PublicInbox/SearchIdx.pm | 4 +-
lib/PublicInbox/SearchMsg.pm | 29 -------
lib/PublicInbox/SearchThread.pm | 147 +++++++++++++++++++++++++++++++++++
lib/PublicInbox/SearchView.pm | 15 ++--
lib/PublicInbox/Thread.pm | 86 ---------------------
lib/PublicInbox/View.pm | 165 ++++++++++++++++++----------------------
lib/PublicInbox/WWW.pm | 2 +-
t/plack.t | 3 +-
t/search.t | 7 +-
t/thread-cycle.t | 97 +++++++++++++++++++++++
14 files changed, 333 insertions(+), 229 deletions(-)
^ permalink raw reply [relevance 7%]
Results 1-2 of 2 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2016-10-05 23:57 7% [PATCH 0/17] remove Mail::Thread dependency Eric Wong
2016-10-05 23:57 3% ` [PATCH 06/17] thread: remove Email::Abstract wrapping Eric Wong
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).