user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download mbox.gz: |
* [PATCH 08/13] www: remove smsg_mime API and adjust callers
  2020-06-01 10:06  7% [PATCH 00/13] smsg: remove tricky {mime} field Eric Wong
@ 2020-06-01 10:06  4% ` Eric Wong
  0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2020-06-01 10:06 UTC (permalink / raw)
  To: meta

To further simplify callers and avoid embarrasing memory
explosions[1], we can finally eliminate this method in
favor of smsg_eml.

[1] commit 7d02b9e64455831d3bda20cd2e64e0c15dc07df5
    ("view: stop storing all MIME objects on large threads")
    fixed a huge memory blowup.
---
 Documentation/mknews.perl     |  7 ++--
 lib/PublicInbox/Feed.pm       |  6 ++--
 lib/PublicInbox/Inbox.pm      | 12 ++-----
 lib/PublicInbox/SearchView.pm |  4 +--
 lib/PublicInbox/Smsg.pm       |  7 ++--
 lib/PublicInbox/View.pm       | 63 +++++++++++++++++------------------
 t/v2mda.t                     |  4 +--
 7 files changed, 50 insertions(+), 53 deletions(-)

diff --git a/Documentation/mknews.perl b/Documentation/mknews.perl
index 3bdebfce7a5..965c30c1dcb 100755
--- a/Documentation/mknews.perl
+++ b/Documentation/mknews.perl
@@ -102,9 +102,10 @@ sub mime2txt {
 }
 
 sub mime2html {
-	my ($out, $mime, $ctx) = @_;
-	my $smsg = bless { mime => $mime }, 'PublicInbox::Smsg';
-	print $out PublicInbox::View::index_entry($smsg, $ctx, 1) or die;
+	my ($out, $eml, $ctx) = @_;
+	my $smsg = bless {}, 'PublicInbox::Smsg';
+	$smsg->populate($eml);
+	print $out PublicInbox::View::eml_entry($ctx, $smsg, $eml, 1) or die;
 }
 
 sub html_start {
diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm
index b770a35077c..4c1056b4665 100644
--- a/lib/PublicInbox/Feed.pm
+++ b/lib/PublicInbox/Feed.pm
@@ -53,9 +53,9 @@ sub new_html_i {
 	my ($nr, $ctx) = @_;
 	my $msgs = $ctx->{msgs};
 	while (my $smsg = shift @$msgs) {
-		my $m = $ctx->{-inbox}->smsg_mime($smsg) or next;
-		my $more = scalar @$msgs;
-		return PublicInbox::View::index_entry($m, $ctx, $more);
+		my $eml = $ctx->{-inbox}->smsg_eml($smsg) or next;
+		return PublicInbox::View::eml_entry($ctx, $smsg, $eml,
+							scalar @$msgs);
 	}
 	PublicInbox::View::pagination_footer($ctx, './new.html');
 }
diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm
index 38abdfe5847..af034358b15 100644
--- a/lib/PublicInbox/Inbox.pm
+++ b/lib/PublicInbox/Inbox.pm
@@ -326,18 +326,12 @@ sub msg_by_smsg ($$) {
 	git($self)->cat_file($blob);
 }
 
-sub smsg_mime {
-	my ($self, $smsg) = @_;
-	if (my $s = msg_by_smsg($self, $smsg)) {
-		$smsg->{mime} = PublicInbox::Eml->new($s);
-		return $smsg;
-	}
-}
-
 sub smsg_eml {
 	my ($self, $smsg) = @_;
 	my $bref = msg_by_smsg($self, $smsg) or return;
-	PublicInbox::Eml->new($bref);
+	my $eml = PublicInbox::Eml->new($bref);
+	$smsg->populate($eml) unless exists($smsg->{num}); # v1 w/o SQLite
+	$eml;
 }
 
 sub mid2num($$) {
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index 249cf53926d..d53a533e53c 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -300,8 +300,8 @@ sub mset_thread_i {
 	my ($nr, $ctx) = @_;
 	my $msgs = $ctx->{msgs} or return;
 	while (my $smsg = pop @$msgs) {
-		$ctx->{-inbox}->smsg_mime($smsg) or next;
-		return PublicInbox::View::index_entry($smsg, $ctx,
+		my $eml = $ctx->{-inbox}->smsg_eml($smsg) or next;
+		return PublicInbox::View::eml_entry($ctx, $smsg, $eml,
 							scalar @$msgs);
 	}
 	my ($skel) = delete @$ctx{qw(skel msgs)};
diff --git a/lib/PublicInbox/Smsg.pm b/lib/PublicInbox/Smsg.pm
index 446bca81b48..9688c5592a2 100644
--- a/lib/PublicInbox/Smsg.pm
+++ b/lib/PublicInbox/Smsg.pm
@@ -12,7 +12,7 @@ use strict;
 use warnings;
 use base qw(Exporter);
 our @EXPORT_OK = qw(subject_normalized);
-use PublicInbox::MID qw/mid_mime/;
+use PublicInbox::MID qw(mid_mime mids);
 use PublicInbox::Address;
 use PublicInbox::MsgTime qw(msg_timestamp msg_datestamp);
 use Time::Local qw(timegm);
@@ -105,7 +105,7 @@ sub __hdr ($$) {
 	};
 }
 
-# for Import and v1 WWW code paths
+# for Import and v1 non-SQLite WWW code paths
 sub populate {
 	my ($self, $hdr, $v2w) = @_;
 	for my $f (qw(From To Cc Subject)) {
@@ -133,6 +133,9 @@ sub populate {
 	$self->{-ts} = [ my @ts = msg_timestamp($hdr, $v2w->{cotime}) ];
 	$self->{ds} //= $ds[0]; # no zone
 	$self->{ts} //= $ts[0];
+
+	# for v1 users w/o SQLite
+	$self->{mid} //= eval { mids($hdr)->[0] } // '';
 }
 
 sub subject ($) { __hdr($_[0], 'Subject') }
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index a05ac4142f2..0bc2b06e4dc 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -74,11 +74,10 @@ sub msg_page_more { # cold
 	my $ibx = $ctx->{-inbox};
 	my $next = $ibx->over->next_by_mid($ctx->{mid}, \$id, \$prev);
 	$ctx->{more} = [ $id, $prev, $next ] if $next;
-	$smsg = $ibx->smsg_mime($smsg) or return '';
+	my $eml = $ibx->smsg_eml($smsg) or return '';
 	$ctx->{mhref} = '../' . mid_href($smsg->{mid}) . '/';
-	my $mime = delete $smsg->{mime};
-	$ctx->{obuf} = _msg_page_prepare_obuf($mime->header_obj, $ctx, $nr);
-	multipart_text_as_html($mime, $ctx);
+	$ctx->{obuf} = _msg_page_prepare_obuf($eml->header_obj, $ctx, $nr);
+	multipart_text_as_html($eml, $ctx);
 	${delete $ctx->{obuf}} .= '</pre><hr>';
 }
 
@@ -181,14 +180,14 @@ sub nr_to_s ($$$) {
 # human-friendly format
 sub fmt_ts ($) { strftime('%Y-%m-%d %k:%M', gmtime($_[0])) }
 
+# Displays the text of of the message for /$INBOX/$MSGID/[Tt]/ endpoint
 # this is already inside a <pre>
-sub index_entry {
-	my ($smsg, $ctx, $more) = @_;
-	my $subj = $smsg->subject;
-	my $mid_raw = $smsg->mid;
+sub eml_entry {
+	my ($ctx, $smsg, $eml, $more) = @_;
+	my $subj = delete $smsg->{subject};
+	my $mid_raw = $smsg->{mid};
 	my $id = id_compress($mid_raw, 1);
 	my $id_m = 'm'.$id;
-
 	my $root_anchor = $ctx->{root_anchor} || '';
 	my $irt;
 	my $obfs_ibx = $ctx->{-obfs_ibx};
@@ -201,12 +200,12 @@ sub index_entry {
 	$rv .= $subj . "\n";
 	$rv .= _th_index_lite($mid_raw, \$irt, $id, $ctx);
 	my @tocc;
-	my $ds = $smsg->ds; # for v1 non-Xapian/SQLite users
-	# deleting {mime} is critical to memory use,
-	# the rest of the fields saves about 400K as we iterate across 1K msgs
-	my ($mime) = delete @$smsg{qw(mime ds ts blob subject)};
+	my $ds = delete $smsg->{ds}; # for v1 non-Xapian/SQLite users
+
+	# Deleting these fields saves about 400K as we iterate across 1K msgs
+	delete @$smsg{qw(ts blob)};
 
-	my $hdr = $mime->header_obj;
+	my $hdr = $eml->header_obj;
 	my $from = _hdr_names_html($hdr, 'From');
 	obfuscate_addrs($obfs_ibx, $from) if $obfs_ibx;
 	$rv .= "From: $from @ ".fmt_ts($ds)." UTC";
@@ -244,7 +243,7 @@ sub index_entry {
 	# scan through all parts, looking for displayable text
 	$ctx->{mhref} = $mhref;
 	$ctx->{obuf} = \$rv;
-	$mime->each_part(\&add_text_body, $ctx, 1);
+	$eml->each_part(\&add_text_body, $ctx, 1);
 	delete $ctx->{obuf};
 
 	# add the footer
@@ -372,10 +371,10 @@ sub pre_thread  { # walk_thread callback
 	skel_dump($ctx, $level, $node);
 }
 
-sub thread_index_entry {
-	my ($ctx, $level, $smsg) = @_;
+sub thread_eml_entry {
+	my ($ctx, $level, $smsg, $eml) = @_;
 	my ($beg, $end) = thread_adj_level($ctx, $level);
-	$beg . '<pre>' . index_entry($smsg, $ctx, 0) . '</pre>' . $end;
+	$beg . '<pre>' . eml_entry($ctx, $smsg, $eml, 0) . '</pre>' . $end;
 }
 
 sub stream_thread_i { # PublicInbox::WwwStream::getline callback
@@ -387,8 +386,8 @@ sub stream_thread_i { # PublicInbox::WwwStream::getline callback
 		my $node = shift @$q or next;
 		my $cl = $level + 1;
 		unshift @$q, map { ($cl, $_) } @{$node->{children}};
-		if ($ctx->{-inbox}->smsg_mime($node)) {
-			return thread_index_entry($ctx, $level, $node);
+		if (my $eml = $ctx->{-inbox}->smsg_eml($node)) {
+			return thread_eml_entry($ctx, $level, $node, $eml);
 		} else {
 			return ghost_index_entry($ctx, $level, $node);
 		}
@@ -400,19 +399,19 @@ sub stream_thread ($$) {
 	my ($rootset, $ctx) = @_;
 	my $ibx = $ctx->{-inbox};
 	my @q = map { (0, $_) } @$rootset;
-	my ($smsg, $level);
+	my ($smsg, $eml, $level);
 	while (@q) {
 		$level = shift @q;
-		my $node = shift @q or next;
+		$smsg = shift @q or next;
 		my $cl = $level + 1;
-		unshift @q, map { ($cl, $_) } @{$node->{children}};
-		$smsg = $ibx->smsg_mime($node) and last;
+		unshift @q, map { ($cl, $_) } @{$smsg->{children}};
+		$eml = $ibx->smsg_eml($smsg) and last;
 	}
-	return missing_thread($ctx) unless $smsg;
+	return missing_thread($ctx) unless $eml;
 
 	$ctx->{-obfs_ibx} = $ibx->{obfuscate} ? $ibx : undef;
 	$ctx->{-title_html} = ascii_html($smsg->{subject});
-	$ctx->{-html_tip} = thread_index_entry($ctx, $level, $smsg);
+	$ctx->{-html_tip} = thread_eml_entry($ctx, $level, $smsg, $eml);
 	$ctx->{-queue} = \@q;
 	PublicInbox::WwwStream->response($ctx, 200, \&stream_thread_i);
 }
@@ -452,13 +451,13 @@ sub thread_html {
 	return stream_thread($rootset, $ctx) unless $ctx->{flat};
 
 	# flat display: lazy load the full message from smsg
-	my $smsg;
-	while (my $m = shift @$msgs) {
-		$smsg = $ibx->smsg_mime($m) and last;
+	my ($smsg, $eml);
+	while ($smsg = shift @$msgs) {
+		$eml = $ibx->smsg_eml($smsg) and last;
 	}
 	return missing_thread($ctx) unless $smsg;
 	$ctx->{-title_html} = ascii_html($smsg->{subject});
-	$ctx->{-html_tip} = '<pre>'.index_entry($smsg, $ctx, scalar @$msgs);
+	$ctx->{-html_tip} = '<pre>'.eml_entry($ctx, $smsg, $eml, scalar @$msgs);
 	$ctx->{msgs} = $msgs;
 	PublicInbox::WwwStream->response($ctx, 200, \&thread_html_i);
 }
@@ -467,8 +466,8 @@ sub thread_html_i { # PublicInbox::WwwStream::getline callback
 	my ($nr, $ctx) = @_;
 	my $msgs = $ctx->{msgs} or return;
 	while (my $smsg = shift @$msgs) {
-		$ctx->{-inbox}->smsg_mime($smsg) or next;
-		return index_entry($smsg, $ctx, scalar @$msgs);
+		my $eml = $ctx->{-inbox}->smsg_eml($smsg) or next;
+		return eml_entry($ctx, $smsg, $eml, scalar @$msgs);
 	}
 	my ($skel) = delete @$ctx{qw(skel msgs)};
 	$$skel;
diff --git a/t/v2mda.t b/t/v2mda.t
index 36f43ff096c..7666eb2dacd 100644
--- a/t/v2mda.t
+++ b/t/v2mda.t
@@ -52,8 +52,8 @@ if ($V == 1) {
 }
 my $msgs = $ibx->search->query('');
 is(scalar(@$msgs), 1, 'only got one message');
-my $saved = $ibx->smsg_mime($msgs->[0]);
-is($saved->{mime}->as_string, $mime->as_string, 'injected message');
+my $eml = $ibx->smsg_eml($msgs->[0]);
+is($eml->as_string, $mime->as_string, 'injected message');
 
 {
 	my @new = glob("$faildir/new/*");

^ permalink raw reply related	[relevance 4%]

* [PATCH 00/13] smsg: remove tricky {mime} field
@ 2020-06-01 10:06  7% Eric Wong
  2020-06-01 10:06  4% ` [PATCH 08/13] www: remove smsg_mime API and adjust callers Eric Wong
  0 siblings, 1 reply; 2+ results
From: Eric Wong @ 2020-06-01 10:06 UTC (permalink / raw)
  To: meta

Storing a large PublicInbox::Eml (or in the past, Email::MIME)
object inside a small PublicInbox::Smsg object has historically
been bloat-prone[1] since there may be many small smsgs in
memory at once

Hundreds or thousands of $smsg objects can linger in memory due
to search results and message threading operations.  So keep
$eml and $smsg objects independent of each other, for now.
Instead, we'll introduce a $smsg->populate($eml) API to handle
filling in the keys for the importer, indexer, and
non-SQLite-using WWW users.

Furthermore, $smsg->$field dispatch has always been measurably
faster than $smsg->{$field} access in NNTP.  Since $smsg->$field
became read-only with the removal of $smsg->{mime}, we can
abandon the $smsg->$field invocations and favor of direct hash
access.

[1] the prime example being what commit 7d02b9e64455831d fixed
    ("view: stop storing all MIME objects on large threads")

Eric Wong (13):
  inbox: introduce smsg_eml method
  wwwatomstream: convert callers to use smsg_eml
  v2writable: fix non-sensical interpolation in BUG message
  import: modernize to use Perl 5.10 features
  smsg: introduce ->populate method
  smsg: get rid of ->wrap initializer, too
  inbox: msg_by_*: remove $(size)ref args
  www: remove smsg_mime API and adjust callers
  nntp: smsg_range_i: favor ->{$field} lookups when possible
  smsg: get rid of remaining {mime} users
  smsg: remove ->bytes and ->lines methods
  smsg: remove remaining accessor methods
  wwwatomstream: drop smsg->{mid} fallback for non-SQLite

 Documentation/mknews.perl        |   7 +-
 lib/PublicInbox/ExtMsg.pm        |   2 +-
 lib/PublicInbox/Feed.pm          |   8 +-
 lib/PublicInbox/Import.pm        |  69 ++++++++---------
 lib/PublicInbox/Inbox.pm         |  32 ++++----
 lib/PublicInbox/Mbox.pm          |   2 +-
 lib/PublicInbox/NNTP.pm          |  14 +++-
 lib/PublicInbox/OverIdx.pm       |   3 +-
 lib/PublicInbox/SearchIdx.pm     |  33 ++++-----
 lib/PublicInbox/SearchView.pm    |   6 +-
 lib/PublicInbox/Smsg.pm          | 123 +++++++++++--------------------
 lib/PublicInbox/SolverGit.pm     |   4 +-
 lib/PublicInbox/V2Writable.pm    |  11 +--
 lib/PublicInbox/View.pm          |  63 ++++++++--------
 lib/PublicInbox/WwwAtomStream.pm |   8 +-
 t/altid.t                        |   3 +-
 t/altid_v2.t                     |   3 +-
 t/import.t                       |   3 +-
 t/search.t                       |  46 +++++++-----
 t/v2mda.t                        |   4 +-
 t/v2writable.t                   |   5 +-
 21 files changed, 207 insertions(+), 242 deletions(-)


^ permalink raw reply	[relevance 7%]

Results 1-2 of 2 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2020-06-01 10:06  7% [PATCH 00/13] smsg: remove tricky {mime} field Eric Wong
2020-06-01 10:06  4% ` [PATCH 08/13] www: remove smsg_mime API and adjust callers Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).