From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.0 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 5A1E7209B9 for ; Wed, 5 Oct 2016 23:57:26 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 06/17] thread: remove Email::Abstract wrapping Date: Wed, 5 Oct 2016 23:57:11 +0000 Message-Id: <20161005235722.14857-7-e@80x24.org> In-Reply-To: <20161005235722.14857-1-e@80x24.org> References: <20161005235722.14857-1-e@80x24.org> List-Id: This roughly doubles performance due to the reduction in object creation and abstraction layers. --- lib/PublicInbox/SearchMsg.pm | 29 ------------ lib/PublicInbox/SearchThread.pm | 99 ++++++++++++----------------------------- lib/PublicInbox/SearchView.pm | 8 ++-- lib/PublicInbox/View.pm | 90 +++++++++++++++++-------------------- t/search.t | 7 +-- 5 files changed, 76 insertions(+), 157 deletions(-) diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm index 9d873c4..9dcc1e6 100644 --- a/lib/PublicInbox/SearchMsg.pm +++ b/lib/PublicInbox/SearchMsg.pm @@ -144,35 +144,6 @@ sub ensure_metadata { } } -# for threading only -sub mini_mime { - my ($self) = @_; - $self->ensure_metadata; - my @h = ( - 'Subject' => $self->subject, - 'X-PI-From' => $self->from_name, - # prevent Email::Simple::Creator from running, - # this header is useless for threading as we use X-PI-TS - # for sorting and display: - 'Date' => EPOCH_822, - 'Message-ID' => "<$self->{mid}>", - 'X-PI-TS' => $self->ts, - ); - if (my $refs = $self->{references}) { - push @h, References => $refs; - } - my $mime = Email::MIME->create(header => \@h); - my $h = $mime->header_obj; - - # set these headers manually since Encode::encode('MIME-Q', ...) - # will add spaces to long values when using header_str above. - - # drop useless headers Email::MIME set for us - $h->header_set('Date'); - $h->header_set('MIME-Version'); - $mime; -} - sub mid ($;$) { my ($self, $mid) = @_; diff --git a/lib/PublicInbox/SearchThread.pm b/lib/PublicInbox/SearchThread.pm index 53fec97..d39e9b6 100644 --- a/lib/PublicInbox/SearchThread.pm +++ b/lib/PublicInbox/SearchThread.pm @@ -20,7 +20,6 @@ package PublicInbox::SearchThread; use strict; use warnings; -use Email::Abstract; sub new { return bless { @@ -30,37 +29,6 @@ sub new { }, $_[0]; } -sub _get_hdr { - my ($class, $msg, $hdr) = @_; - Email::Abstract->get_header($msg, $hdr) || ''; -} - -sub _uniq { - my %seen; - return grep { !$seen{$_}++ } @_; -} - -sub _references { - my $class = shift; - my $msg = shift; - my @references = ($class->_get_hdr($msg, "References") =~ /<([^>]+)>/g); - my $foo = $class->_get_hdr($msg, "In-Reply-To"); - chomp $foo; - $foo =~ s/.*?<([^>]+)>.*/$1/; - push @references, $foo - if $foo =~ /^\S+\@\S+$/ && (!@references || $references[-1] ne $foo); - return _uniq(@references); -} - -sub _msgid { - my ($class, $msg) = @_; - my $id = $class->_get_hdr($msg, "Message-ID"); - die "attempt to thread message with no id" unless $id; - chomp $id; - $id =~ s/^<([^>]+)>.*/$1/; # We expect this not to have <>s - return $id; -} - sub rootset { @{$_[0]{rootset}} } sub thread { @@ -77,14 +45,11 @@ sub _finish { delete $self->{seen}; } -sub _get_cont_for_id { - my $self = shift; - my $id = shift; - $self->{id_table}{$id} ||= $self->_container_class->new($id); +sub _get_cont_for_id ($$) { + my ($self, $mid) = @_; + $self->{id_table}{$mid} ||= PublicInbox::SearchThread::Msg->new($mid); } -sub _container_class { 'PublicInbox::SearchThread::Container' } - sub _setup { my ($self) = @_; @@ -92,41 +57,40 @@ sub _setup { } sub _add_message ($$) { - my ($self, $message) = @_; + my ($self, $smsg) = @_; # A. if id_table... - my $this_container = $self->_get_cont_for_id($self->_msgid($message)); - $this_container->{message} = $message; + my $this = _get_cont_for_id($self, $smsg->{mid}); + $this->{smsg} = $smsg; # B. For each element in the message's References field: - my @refs = $self->_references($message); - my $prev; - for my $ref (@refs) { - # Find a Container object for the given Message-ID - my $container = $self->_get_cont_for_id($ref); - - # Link the References field's Containers together in the - # order implied by the References header - # * If they are already linked don't change the existing links - # * Do not add a link if adding that link would introduce - # a loop... - - if ($prev && - !$container->{parent} && # already linked - !$container->has_descendent($prev) # would loop - ) { - $prev->add_child($container); + if (defined(my $refs = $smsg->{references})) { + foreach my $ref ($refs =~ m/<([^>]+)>/g) { + # Find a Container object for the given Message-ID + my $cont = _get_cont_for_id($self, $ref); + + # Link the References field's Containers together in + # the order implied by the References header + # + # * If they are already linked don't change the + # existing links + # * Do not add a link if adding that link would + # introduce a loop... + if ($prev && + !$cont->{parent} && # already linked + !$cont->has_descendent($prev) # would loop + ) { + $prev->add_child($cont); + } + $prev = $cont; } - $prev = $container; } # C. Set the parent of this message to be the last element in # References... - if ($prev && - !$this_container->has_descendent($prev) # would loop - ) { - $prev->add_child($this_container) + if ($prev && !$this->has_descendent($prev)) { # would loop + $prev->add_child($this) } } @@ -135,7 +99,7 @@ sub order { my $ordersub = shift; # make a fake root - my $root = $self->_container_class->new( 'fakeroot' ); + my $root = _get_cont_for_id($self, 'fakeroot'); $root->add_child( $_ ) for @{ $self->{rootset} }; # sort it @@ -147,17 +111,12 @@ sub order { $root->remove_child($_) for @$kids; } -package PublicInbox::SearchThread::Container; +package PublicInbox::SearchThread::Msg; use Carp qw(croak); use Scalar::Util qw(weaken); sub new { my $self = shift; bless { id => shift }, $self; } -sub message { $_[0]->{message} } -sub child { $_[0]->{child} } -sub next { $_[0]->{next} } -sub messageid { $_[0]->{id} } - sub add_child { my ($self, $child) = @_; croak "Cowardly refusing to become my own parent: $self" diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm index 0d54c3d..ebeb41f 100644 --- a/lib/PublicInbox/SearchView.pm +++ b/lib/PublicInbox/SearchView.pm @@ -148,7 +148,6 @@ sub mset_thread { my $i = $_; my $m = PublicInbox::SearchMsg->load_doc($i->get_document); $pct{$m->mid} = $i->get_percent; - $m = $m->mini_mime; $m; } ($mset->items); @@ -156,9 +155,9 @@ sub mset_thread { $th->thread; if ($q->{r}) { # order by relevance $th->order(sub { - [ sort { (eval { $pct{$b->topmost->messageid} } || 0) + [ sort { (eval { $pct{$b->topmost->{id}} } || 0) <=> - (eval { $pct{$a->topmost->messageid} } || 0) + (eval { $pct{$a->topmost->{id}} } || 0) } @{$_[0]} ]; }); } else { # order by time (default for threaded view) @@ -185,8 +184,7 @@ sub mset_thread { sub { return unless $msgs; while ($mime = shift @$msgs) { - my $mid = mid_clean(mid_mime($mime)); - $mime = $inbox->msg_by_mid($mid) and last; + $mime = $inbox->msg_by_smsg($mime) and last; } if ($mime) { $mime = Email::MIME->new($mime); diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm index e90efda..748e391 100644 --- a/lib/PublicInbox/View.pm +++ b/lib/PublicInbox/View.pm @@ -214,12 +214,12 @@ sub _th_index_lite { $rv .= $pad . $irt_map->[1]; if ($idx > 0) { my $prev = $siblings->[$idx - 1]; - my $pmid = $prev->messageid; + my $pmid = $prev->{id}; if ($idx > 2) { my $s = ($idx - 1). ' preceding siblings ...'; $rv .= pad_link($pmid, $level, $s); } elsif ($idx == 2) { - my $ppmid = $siblings->[0]->messageid; + my $ppmid = $siblings->[0]->{id}; $rv .= $pad . $mapping->{$ppmid}->[1]; } $rv .= $pad . $mapping->{$pmid}->[1]; @@ -233,25 +233,25 @@ sub _th_index_lite { $this =~ s!]+>([^<]+)!$1!s; # no point linking to self $rv .= "@ $this"; my $node = $map->[2]; - if (my $child = $node->child) { - my $cmid = $child->messageid; + if (my $child = $node->{child}) { + my $cmid = $child->{id}; $rv .= $pad . $mapping->{$cmid}->[1]; if ($nr_c > 2) { my $s = ($nr_c - 1). ' more replies'; $rv .= pad_link($cmid, $level + 1, $s); - } elsif (my $cn = $child->next) { - $rv .= $pad . $mapping->{$cn->messageid}->[1]; + } elsif (my $cn = $child->{next}) { + $rv .= $pad . $mapping->{$cn->{id}}->[1]; } } - if (my $next = $node->next) { - my $nmid = $next->messageid; + if (my $next = $node->{next}) { + my $nmid = $next->{id}; $rv .= $pad . $mapping->{$nmid}->[1]; my $nnext = $nr_s - $idx; if ($nnext > 2) { my $s = ($nnext - 1).' subsequent siblings'; $rv .= pad_link($nmid, $level, $s); - } elsif (my $nn = $next->next) { - $rv .= $pad . $mapping->{$nn->messageid}->[1]; + } elsif (my $nn = $next->{next}) { + $rv .= $pad . $mapping->{$nn->{id}}->[1]; } } $rv .= $pad ."$s_s, $s_c; $ctx->{s_nr}\n"; @@ -264,7 +264,7 @@ sub walk_thread { my $level = shift @q; my $node = shift @q or next; $cb->($ctx, $level, $node); - unshift @q, $level+1, $node->child, $level, $node->next; + unshift @q, $level+1, $node->{child}, $level, $node->{next}; } } @@ -272,12 +272,12 @@ sub pre_thread { my ($ctx, $level, $node) = @_; my $mapping = $ctx->{mapping}; my $idx = -1; - if (my $parent = $node->parent) { - my $m = $mapping->{$parent->messageid}->[0]; + if (my $parent = $node->{parent}) { + my $m = $mapping->{$parent->{id}}->[0]; $idx = scalar @$m; push @$m, $node; } - $mapping->{$node->messageid} = [ [], '', $node, $idx, $level ]; + $mapping->{$node->{id}} = [ [], '', $node, $idx, $level ]; skel_dump($ctx, $level, $node); } @@ -296,8 +296,8 @@ sub stream_thread ($$) { while (@q) { $level = shift @q; my $node = shift @q or next; - unshift @q, $level+1, $node->child, $level, $node->next; - $mime = $inbox->msg_by_mid($node->messageid) and last; + unshift @q, $level+1, $node->{child}, $level, $node->{next}; + $mime = $inbox->msg_by_smsg($node->{smsg}) and last; } return missing_thread($ctx) unless $mime; @@ -309,13 +309,14 @@ sub stream_thread ($$) { while (@q) { $level = shift @q; my $node = shift @q or next; - unshift @q, $level+1, $node->child, $level, $node->next; - my $mid = $node->messageid; - if ($mime = $inbox->msg_by_mid($mid)) { + unshift @q, $level+1, $node->{child}, + $level, $node->{next}; + my $mid = $node->{id}; + if ($mime = $inbox->msg_by_smsg($node->{smsg})) { $mime = Email::MIME->new($mime); return thread_index_entry($ctx, $level, $mime); } else { - return ghost_index_entry($ctx, $level, $mid); + return ghost_index_entry($ctx, $level, $node); } } my $ret = join('', thread_adj_level($ctx, 0)); @@ -355,11 +356,11 @@ sub thread_html { $skel .= ''; return stream_thread($th, $ctx) unless $ctx->{flat}; - # flat display: lazy load the full message from mini_mime: + # flat display: lazy load the full message from smsg my $inbox = $ctx->{-inbox}; my $mime; while ($mime = shift @$msgs) { - $mime = $inbox->msg_by_mid(mid_clean(mid_mime($mime))) and last; + $mime = $inbox->msg_by_smsg($mime) and last; } return missing_thread($ctx) unless $mime; $mime = Email::MIME->new($mime); @@ -369,8 +370,7 @@ sub thread_html { PublicInbox::WwwStream->response($ctx, 200, sub { return unless $msgs; while ($mime = shift @$msgs) { - $mid = mid_clean(mid_mime($mime)); - $mime = $inbox->msg_by_mid($mid) and last; + $mime = $inbox->msg_by_smsg($mime) and last; } if ($mime) { $mime = Email::MIME->new($mime); @@ -738,7 +738,7 @@ sub indent_for { sub load_results { my ($sres) = @_; - [ map { $_->mini_mime } @{delete $sres->{msgs}} ]; + [ map { $_->ensure_metadata; $_ } @{delete $sres->{msgs}} ]; } sub msg_timestamp { @@ -771,13 +771,13 @@ sub _msg_date { sub fmt_ts { POSIX::strftime('%Y-%m-%d %k:%M', gmtime($_[0])) } sub _skel_header { - my ($ctx, $hdr, $level) = @_; + my ($ctx, $smsg, $level) = @_; my $dst = $ctx->{dst}; my $cur = $ctx->{cur}; - my $mid = mid_clean($hdr->header_raw('Message-ID')); - my $f = ascii_html($hdr->header('X-PI-From')); - my $d = _msg_date($hdr) . ' ' . indent_for($level) . th_pfx($level); + my $mid = $smsg->{mid}; + my $f = ascii_html($smsg->from_name); + my $d = fmt_ts($smsg->{ts}) . ' ' . indent_for($level) . th_pfx($level); my $attr = $f; $ctx->{first_level} ||= $level; @@ -802,7 +802,7 @@ sub _skel_header { # Subject is never undef, this mail was loaded from # our Xapian which would've resulted in '' if it were # really missing (and Filter rejects empty subjects) - my $s = $hdr->header('Subject'); + my $s = $smsg->subject; my $h = $ctx->{srch}->subject_path($s); if ($ctx->{seen}->{$h}) { $s = undef; @@ -829,10 +829,10 @@ sub _skel_header { sub skel_dump { my ($ctx, $level, $node) = @_; - if (my $mime = $node->message) { - _skel_header($ctx, $mime->header_obj, $level); + if (my $smsg = $node->{smsg}) { + _skel_header($ctx, $smsg, $level); } else { - my $mid = $node->messageid; + my $mid = $node->{id}; my $dst = $ctx->{dst}; my $mapping = $ctx->{mapping}; my $map = $mapping->{$mid} if $mapping; @@ -857,31 +857,24 @@ sub skel_dump { sub sort_ts { [ sort { - (eval { $a->topmost->message->header('X-PI-TS') } || 0) <=> - (eval { $b->topmost->message->header('X-PI-TS') } || 0) + (eval { $a->topmost->{smsg}->ts } || 0) <=> + (eval { $b->topmost->{smsg}->ts } || 0) } @{$_[0]} ]; } -sub _tryload_ghost ($$) { - my ($srch, $mid) = @_; - my $smsg = $srch->lookup_mail($mid) or return; - $smsg->mini_mime; -} - # accumulate recent topics if search is supported # returns 200 if done, 404 if not sub acc_topic { my ($ctx, $level, $node) = @_; my $srch = $ctx->{srch}; - my $mid = $node->messageid; - my $x = $node->message || _tryload_ghost($srch, $mid); + my $mid = $node->{id}; + my $x = $node->{smsg} || $srch->lookup_mail($mid); my ($subj, $ts); my $topic; if ($x) { - $x = $x->header_obj; - $subj = $x->header('Subject') || ''; + $subj = $x->subject; $subj = $srch->subject_normalized($subj); - $ts = $x->header('X-PI-TS'); + $ts = $x->ts; if ($level == 0) { $topic = [ $ts, 1, { $subj => $mid }, $subj ]; $ctx->{-cur_topic} = $topic; @@ -1023,9 +1016,10 @@ sub thread_adj_level { } sub ghost_index_entry { - my ($ctx, $level, $mid) = @_; + my ($ctx, $level, $node) = @_; my ($beg, $end) = thread_adj_level($ctx, $level); - $beg . '
'. ghost_parent($ctx->{-upfx}, $mid) . '
' . $end; + $beg . '
'. ghost_parent($ctx->{-upfx}, $node->{id})
+		. '
' . $end; } 1; diff --git a/t/search.t b/t/search.t index cce3b9e..eed9c9b 100644 --- a/t/search.t +++ b/t/search.t @@ -293,7 +293,7 @@ sub filter_mids { $smsg->ensure_metadata; is($smsg->references, '', "no references created"); my $msg = PublicInbox::SearchMsg->load_doc($smsg->{doc}); - is($s, $msg->mini_mime->header('Subject'), 'long subject not rewritten'); + is($s, $msg->subject, 'long subject not rewritten'); } { @@ -310,10 +310,7 @@ sub filter_mids { my $smsg = $rw->lookup_message('testmessage@example.com'); my $msg = PublicInbox::SearchMsg->load_doc($smsg->{doc}); - # mini_mime technically not valid (I think), - # but good enough for displaying HTML: - is($mime->header('Subject'), $msg->mini_mime->header('Subject'), - 'UTF-8 subject preserved'); + is($mime->header('Subject'), $msg->subject, 'UTF-8 subject preserved'); } { -- EW