From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id F26012141F for ; Thu, 10 Jan 2019 21:35:50 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 7/7] view: more culling for search threads Date: Thu, 10 Jan 2019 21:35:49 +0000 Message-Id: <20190110213549.19778-8-e@80x24.org> In-Reply-To: <20190110213549.19778-1-e@80x24.org> References: <20190110213549.19778-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: {mapping} overhead is now down to ~1.3M at the end of a giant thread from hell. --- lib/PublicInbox/Inbox.pm | 5 +++-- lib/PublicInbox/SearchThread.pm | 5 +++++ lib/PublicInbox/View.pm | 10 ++++++++-- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm index 73f5761..d57e46d 100644 --- a/lib/PublicInbox/Inbox.pm +++ b/lib/PublicInbox/Inbox.pm @@ -302,8 +302,9 @@ sub smsg_by_mid ($$) { my ($self, $mid) = @_; my $srch = search($self) or return; # favor the Message-ID we used for the NNTP article number: - my $num = mid2num($self, $mid); - defined $num ? $srch->lookup_article($num) : undef; + defined(my $num = mid2num($self, $mid)) or return; + my $smsg = $srch->lookup_article($num) or return; + PublicInbox::SearchMsg::psgi_cull($smsg); } sub msg_by_mid ($$;$) { diff --git a/lib/PublicInbox/SearchThread.pm b/lib/PublicInbox/SearchThread.pm index be29098..931bd57 100644 --- a/lib/PublicInbox/SearchThread.pm +++ b/lib/PublicInbox/SearchThread.pm @@ -53,6 +53,11 @@ sub _add_message ($$) { my $this = _get_cont_for_id($id_table, $smsg->{mid}); $this->{smsg} = $smsg; + # saves around 4K across 1K messages + # TODO: move this to a more appropriate place, breaks tests + # if we do it during psgi_cull + delete $smsg->{num}; + # B. For each element in the message's References field: defined(my $refs = $smsg->{references}) or return; diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm index 5ddb842..cd125e0 100644 --- a/lib/PublicInbox/View.pm +++ b/lib/PublicInbox/View.pm @@ -219,7 +219,10 @@ sub index_entry { $rv .= _th_index_lite($mid_raw, \$irt, $id, $ctx); my @tocc; my $ds = $smsg->ds; # for v1 non-Xapian/SQLite users - my $mime = delete $smsg->{mime}; # critical to memory use + # deleting {mime} is critical to memory use, + # the rest of the fields saves about 400K as we iterate across 1K msgs + my ($mime) = delete @$smsg{qw(mime ds ts blob subject)}; + my $hdr = $mime->header_obj; my $from = _hdr_names_html($hdr, 'From'); obfuscate_addrs($obfs_ibx, $from) if $obfs_ibx; @@ -311,7 +314,10 @@ sub _th_index_lite { my $nr_s = 0; my $siblings; if (my $smsg = $node->{smsg}) { - ($$irt) = (($smsg->{references} || '') =~ m/<([^>]+)>\z/); + # delete saves about 200KB on a 1K message thread + if (my $refs = delete $smsg->{references}) { + ($$irt) = ($refs =~ m/<([^>]+)>\z/); + } } my $irt_map = $mapping->{$$irt} if defined $$irt; if (defined $irt_map) { -- EW