user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download mbox.gz: |
* [PATCH 12/13] smsg: remove remaining accessor methods
  2020-06-01 10:06  7% [PATCH 00/13] smsg: remove tricky {mime} field Eric Wong
@ 2020-06-01 10:06  3% ` Eric Wong
  0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2020-06-01 10:06 UTC (permalink / raw)
  To: meta

We'll continue to favor simpler data models that can be
used directly rather than wasting time and memory with
accessor APIs.

The ->from, ->to, -cc, ->mid, ->subject, >references methods can
all be trivially replaced by hash lookups since all their values
are stored in doc_data.  Most remaining callers of those methods
were test cases, anyways.

->from_name is only used in the PSGI code, so we can just
use ->psgi_cull to take care of populating the {from_name}
field.
---
 lib/PublicInbox/ExtMsg.pm  |  2 +-
 lib/PublicInbox/Mbox.pm    |  2 +-
 lib/PublicInbox/NNTP.pm    |  2 +-
 lib/PublicInbox/OverIdx.pm |  2 +-
 lib/PublicInbox/Smsg.pm    | 45 ++++++-------------------------------
 t/altid.t                  |  3 ++-
 t/altid_v2.t               |  3 ++-
 t/search.t                 | 46 +++++++++++++++++++++-----------------
 t/v2writable.t             |  5 +++--
 9 files changed, 44 insertions(+), 66 deletions(-)

diff --git a/lib/PublicInbox/ExtMsg.pm b/lib/PublicInbox/ExtMsg.pm
index 1d17c2ce673..d7917b34fb4 100644
--- a/lib/PublicInbox/ExtMsg.pm
+++ b/lib/PublicInbox/ExtMsg.pm
@@ -30,7 +30,7 @@ our @EXT_URL = map { ascii_html($_) } (
 sub PARTIAL_MAX () { 100 }
 
 sub mids_from_mset { # Search::retry_reopen callback
-	[ map { PublicInbox::Smsg::from_mitem($_)->mid } $_[0]->items ];
+	[ map { PublicInbox::Smsg::from_mitem($_)->{mid} } $_[0]->items ];
 }
 
 sub search_partial ($$) {
diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm
index 94e61d4d2ff..b46dacfdc88 100644
--- a/lib/PublicInbox/Mbox.pm
+++ b/lib/PublicInbox/Mbox.pm
@@ -151,7 +151,7 @@ sub thread_mbox {
 	return [404, [qw(Content-Type text/plain)], []] if !@$msgs;
 	$ctx->{prev} = $msgs->[-1];
 	$ctx->{over} = $over; # bump refcnt
-	PublicInbox::MboxGz->response($ctx, \&thread_cb, $msgs->[0]->subject);
+	PublicInbox::MboxGz->response($ctx, \&thread_cb, $msgs->[0]->{subject});
 }
 
 sub emit_range {
diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm
index a37910d1739..ac13c7df8ce 100644
--- a/lib/PublicInbox/NNTP.pm
+++ b/lib/PublicInbox/NNTP.pm
@@ -296,7 +296,7 @@ sub newnews_i {
 	my $msgs = $over->query_ts($ts, $$prev);
 	if (scalar @$msgs) {
 		more($self, '<' .
-			join(">\r\n<", map { $_->mid } @$msgs ).
+			join(">\r\n<", map { $_->{mid} } @$msgs ).
 			'>');
 		$$prev = $msgs->[-1]->{num};
 	} else {
diff --git a/lib/PublicInbox/OverIdx.pm b/lib/PublicInbox/OverIdx.pm
index a078f80451f..c7f45a6c910 100644
--- a/lib/PublicInbox/OverIdx.pm
+++ b/lib/PublicInbox/OverIdx.pm
@@ -257,7 +257,7 @@ sub add_overview {
 	my $hdr = $mime->header_obj;
 	my $mids = mids_for_index($hdr);
 	my $refs = parse_references($smsg, $hdr, $mids);
-	my $subj = $smsg->subject;
+	my $subj = $smsg->{subject};
 	my $xpath;
 	if ($subj ne '') {
 		$xpath = subject_path($subj);
diff --git a/lib/PublicInbox/Smsg.pm b/lib/PublicInbox/Smsg.pm
index a7ee2e40939..e8f9c9a3681 100644
--- a/lib/PublicInbox/Smsg.pm
+++ b/lib/PublicInbox/Smsg.pm
@@ -28,7 +28,7 @@ sub to_doc_data {
 	join("\n",
 		$self->{subject},
 		$self->{from},
-		$self->references,
+		$self->{references} // '',
 		$self->{to},
 		$self->{cc},
 		$self->{blob},
@@ -74,11 +74,15 @@ sub load_expand {
 
 sub psgi_cull ($) {
 	my ($self) = @_;
-	from_name($self); # fill in {from_name} so we can delete {from}
+
+	# ghosts don't have ->{from}
+	my $from = delete($self->{from}) // '';
+	my @n = PublicInbox::Address::names($from);
+	$self->{from_name} = join(', ', @n);
 
 	# drop NNTP-only fields which aren't relevant to PSGI results:
 	# saves ~80K on a 200 item search result:
-	delete @$self{qw(from ts to cc bytes lines)};
+	delete @$self{qw(ts to cc bytes lines)};
 	$self;
 }
 
@@ -90,11 +94,6 @@ sub from_mitem {
 	psgi_cull(load_expand($self, $mitem->get_document));
 }
 
-sub __hdr ($$) {
-	my ($self, $field) = @_;
-	$self->{lc($field)};
-}
-
 # for Import and v1 non-SQLite WWW code paths
 sub populate {
 	my ($self, $hdr, $v2w) = @_;
@@ -128,10 +127,6 @@ sub populate {
 	$self->{mid} //= eval { mids($hdr)->[0] } // '';
 }
 
-sub subject ($) { __hdr($_[0], 'Subject') }
-sub to ($) { __hdr($_[0], 'To') }
-sub cc ($) { __hdr($_[0], 'Cc') }
-
 # no strftime, that is locale-dependent and not for RFC822
 my @DoW = qw(Sun Mon Tue Wed Thu Fri Sat);
 my @MoY = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec);
@@ -146,32 +141,6 @@ sub date ($) {
 
 }
 
-sub from ($) {
-	my ($self) = @_;
-	my $from = __hdr($self, 'From');
-	if (defined $from && !defined $self->{from_name}) {
-		my @n = PublicInbox::Address::names($from);
-		$self->{from_name} = join(', ', @n);
-	}
-	$from;
-}
-
-sub from_name {
-	my ($self) = @_;
-	my $from_name = $self->{from_name};
-	return $from_name if defined $from_name;
-	$self->from;
-	$self->{from_name};
-}
-
-sub references {
-	my ($self) = @_;
-	my $x = $self->{references};
-	defined $x ? $x : '';
-}
-
-sub mid { $_[0]->{mid} }
-
 our $REPLY_RE = qr/^re:\s+/i;
 
 sub subject_normalized ($) {
diff --git a/t/altid.t b/t/altid.t
index 670a3963375..f3c01520c6d 100644
--- a/t/altid.t
+++ b/t/altid.t
@@ -47,7 +47,8 @@ EOF
 {
 	my $ro = PublicInbox::Search->new($ibx);
 	my $msgs = $ro->query("gmane:1234");
-	is_deeply([map { $_->mid } @$msgs], ['a@example.com'], 'got one match');
+	$msgs = [ map { $_->{mid} } @$msgs ];
+	is_deeply($msgs, ['a@example.com'], 'got one match');
 
 	$msgs = $ro->query("gmane:666");
 	is_deeply([], $msgs, 'body did NOT match');
diff --git a/t/altid_v2.t b/t/altid_v2.t
index 28a047d992e..01ed9ed48db 100644
--- a/t/altid_v2.t
+++ b/t/altid_v2.t
@@ -42,7 +42,8 @@ EOF
 $v2w->done;
 
 my $msgs = $ibx->search->reopen->query("gmane:1234");
-is_deeply([map { $_->mid } @$msgs], ['a@example.com'], 'got one match');
+$msgs = [ map { $_->{mid} } @$msgs ];
+is_deeply($msgs, ['a@example.com'], 'got one match');
 $msgs = $ibx->search->query("gmane:666");
 is_deeply([], $msgs, 'body did NOT match');
 
diff --git a/t/search.t b/t/search.t
index 9d74f5e0532..6cf2bc2d6b4 100644
--- a/t/search.t
+++ b/t/search.t
@@ -92,7 +92,7 @@ EOF
 
 sub filter_mids {
 	my ($msgs) = @_;
-	sort(map { $_->mid } @$msgs);
+	sort(map { $_->{mid} } @$msgs);
 }
 
 {
@@ -100,7 +100,7 @@ sub filter_mids {
 	$ro->reopen;
 	my $found = $ro->query('m:root@s');
 	is(scalar(@$found), 1, "message found");
-	is($found->[0]->mid, 'root@s', 'mid set correctly') if scalar(@$found);
+	is($found->[0]->{mid}, 'root@s', 'mid set correctly') if @$found;
 
 	my ($res, @res);
 	my @exp = sort qw(root@s last@s);
@@ -176,7 +176,7 @@ EOF
 	# body
 	$res = $ro->query('goodbye');
 	is(scalar(@$res), 1, "goodbye message found");
-	is($res->[0]->mid, 'last@s', 'got goodbye message body') if scalar(@$res);
+	is($res->[0]->{mid}, 'last@s', 'got goodbye message body') if @$res;
 
 	# datestamp
 	$res = $ro->query('dt:20101002000001..20101002000001');
@@ -257,12 +257,13 @@ fade
 EOF
 	my $res = $rw->query("theatre");
 	is(scalar(@$res), 2, "got both matches");
-	is($res->[0]->mid, 'nquote@a', "non-quoted scores higher") if scalar(@$res);
-	is($res->[1]->mid, 'quote@a', "quoted result still returned") if scalar(@$res);
-
+	if (@$res == 2) {
+		is($res->[0]->{mid}, 'nquote@a', 'non-quoted scores higher');
+		is($res->[1]->{mid}, 'quote@a', 'quoted result still returned');
+	}
 	$res = $rw->query("illusions");
 	is(scalar(@$res), 1, "got a match for quoted text");
-	is($res->[0]->mid, 'quote@a',
+	is($res->[0]->{mid}, 'quote@a',
 		"quoted result returned if nothing else") if scalar(@$res);
 });
 
@@ -283,8 +284,10 @@ EOF
 	ok($doc_id > 0, "doc_id defined with circular reference");
 	my $smsg = $rw->query('m:circle@a', {limit=>1})->[0];
 	is(defined($smsg), 1, 'found m:circl@a');
-	is($smsg->references, '', "no references created") if defined($smsg);
-	is($smsg->subject, $s, 'long subject not rewritten') if defined($smsg);
+	if (defined $smsg) {
+		is($smsg->{references}, '', "no references created");
+		is($smsg->{subject}, $s, 'long subject not rewritten');
+	}
 });
 
 $ibx->with_umask(sub {
@@ -293,7 +296,10 @@ $ibx->with_umask(sub {
 	ok($doc_id > 0, 'message indexed doc_id with UTF-8');
 	my $msg = $rw->query('m:testmessage@example.com', {limit => 1})->[0];
 	is(defined($msg), 1, 'found testmessage@example.com');
-	is($mime->header('Subject'), $msg->subject, 'UTF-8 subject preserved') if defined($msg);
+	if (defined $msg) {
+		is($mime->header('Subject'), $msg->{subject},
+			'UTF-8 subject preserved');
+	}
 });
 
 {
@@ -311,14 +317,14 @@ $ibx->with_umask(sub {
 	is($mset->size, 6, 'searched To: successfully');
 	foreach my $m ($mset->items) {
 		my $smsg = $ro->{over_ro}->get_art($m->get_docid);
-		like($smsg->to, qr/\blist\@example\.com\b/, 'to appears');
+		like($smsg->{to}, qr/\blist\@example\.com\b/, 'to appears');
 	}
 
 	$mset = $ro->query('tc:list@example.com', {mset => 1});
 	is($mset->size, 6, 'searched To+Cc: successfully');
 	foreach my $m ($mset->items) {
 		my $smsg = $ro->{over_ro}->get_art($m->get_docid);
-		my $tocc = join("\n", $smsg->to, $smsg->cc);
+		my $tocc = join("\n", $smsg->{to}, $smsg->{cc});
 		like($tocc, qr/\blist\@example\.com\b/, 'tocc appears');
 	}
 
@@ -327,7 +333,7 @@ $ibx->with_umask(sub {
 		is($mset->items, 1, "searched $pfx successfully for Cc:");
 		foreach my $m ($mset->items) {
 			my $smsg = $ro->{over_ro}->get_art($m->get_docid);
-			like($smsg->cc, qr/\bfoo\@example\.com\b/,
+			like($smsg->{cc}, qr/\bfoo\@example\.com\b/,
 				'cc appears');
 		}
 	}
@@ -337,7 +343,7 @@ $ibx->with_umask(sub {
 		is(scalar(@$res), 1,
 			"searched $pfx successfully for From:");
 		foreach my $smsg (@$res) {
-			like($smsg->from_name, qr/Laggy Sender/,
+			like($smsg->{from_name}, qr/Laggy Sender/,
 				"From appears with $pfx");
 		}
 	}
@@ -354,18 +360,18 @@ $ibx->with_umask(sub {
 
 	$res = $ro->query('q:theatre');
 	is(scalar(@$res), 1, 'only one quoted body');
-	like($res->[0]->from_name, qr/\AQuoter/,
+	like($res->[0]->{from_name}, qr/\AQuoter/,
 		'got quoted body') if (scalar(@$res));
 
 	$res = $ro->query('nq:theatre');
 	is(scalar @$res, 1, 'only one non-quoted body');
-	like($res->[0]->from_name, qr/\ANon-Quoter/,
+	like($res->[0]->{from_name}, qr/\ANon-Quoter/,
 		'got non-quoted body') if (scalar(@$res));
 
 	foreach my $pfx (qw(b: bs:)) {
 		$res = $ro->query($pfx . 'theatre');
 		is(scalar @$res, 2, "searched both bodies for $pfx");
-		like($res->[0]->from_name, qr/\ANon-Quoter/,
+		like($res->[0]->{from_name}, qr/\ANon-Quoter/,
 			"non-quoter first for $pfx") if scalar(@$res);
 	}
 }
@@ -379,16 +385,16 @@ $ibx->with_umask(sub {
 	is(scalar @$n, 1, 'got result for n:');
 	my $res = $ro->query('part_deux.txt');
 	is(scalar @$res, 1, 'got result without n:');
-	is($n->[0]->mid, $res->[0]->mid,
+	is($n->[0]->{mid}, $res->[0]->{mid},
 		'same result with and without') if scalar(@$res);
 	my $txt = $ro->query('"inside another"');
 	is(scalar @$txt, 1, 'found inside another');
-	is($txt->[0]->mid, $res->[0]->mid,
+	is($txt->[0]->{mid}, $res->[0]->{mid},
 		'search inside text attachments works') if scalar(@$txt);
 
 	my $art;
 	if (scalar(@$n) >= 1) {
-		my $mid = $n->[0]->mid;
+		my $mid = $n->[0]->{mid};
 		my ($id, $prev);
 		$art = $ro->{over_ro}->next_by_mid($mid, \$id, \$prev);
 		ok($art, 'article exists in OVER DB');
diff --git a/t/v2writable.t b/t/v2writable.t
index fa5c786e151..2bd7a400978 100644
--- a/t/v2writable.t
+++ b/t/v2writable.t
@@ -224,12 +224,13 @@ EOF
 	like($tip, qr/\A[a-f0-9]+ test removal\n\z/s,
 		'commit message propagated to git');
 	is_deeply(\@after, \@before, 'only one commit written to git');
-	is($ibx->mm->num_for($smsg->mid), undef, 'no longer in Msgmap by mid');
+	my $mid = $smsg->{mid};
+	is($ibx->mm->num_for($mid), undef, 'no longer in Msgmap by mid');
 	my $num = $smsg->{num};
 	like($num, qr/\A\d+\z/, 'numeric number in return message');
 	is($ibx->mm->mid_for($num), undef, 'no longer in Msgmap by num');
 	my $srch = $ibx->search->reopen;
-	my $mset = $srch->query('m:'.$smsg->mid, { mset => 1});
+	my $mset = $srch->query('m:'.$mid, { mset => 1});
 	is($mset->size, 0, 'no longer found in Xapian');
 	my @log1 = (@log, qw(-1 --pretty=raw --raw -r --no-renames));
 	is($srch->{over_ro}->get_art($num), undef,

^ permalink raw reply	[relevance 3%]

* [PATCH 00/13] smsg: remove tricky {mime} field
@ 2020-06-01 10:06  7% Eric Wong
  2020-06-01 10:06  3% ` [PATCH 12/13] smsg: remove remaining accessor methods Eric Wong
  0 siblings, 1 reply; 2+ results
From: Eric Wong @ 2020-06-01 10:06 UTC (permalink / raw)
  To: meta

Storing a large PublicInbox::Eml (or in the past, Email::MIME)
object inside a small PublicInbox::Smsg object has historically
been bloat-prone[1] since there may be many small smsgs in
memory at once

Hundreds or thousands of $smsg objects can linger in memory due
to search results and message threading operations.  So keep
$eml and $smsg objects independent of each other, for now.
Instead, we'll introduce a $smsg->populate($eml) API to handle
filling in the keys for the importer, indexer, and
non-SQLite-using WWW users.

Furthermore, $smsg->$field dispatch has always been measurably
faster than $smsg->{$field} access in NNTP.  Since $smsg->$field
became read-only with the removal of $smsg->{mime}, we can
abandon the $smsg->$field invocations and favor of direct hash
access.

[1] the prime example being what commit 7d02b9e64455831d fixed
    ("view: stop storing all MIME objects on large threads")

Eric Wong (13):
  inbox: introduce smsg_eml method
  wwwatomstream: convert callers to use smsg_eml
  v2writable: fix non-sensical interpolation in BUG message
  import: modernize to use Perl 5.10 features
  smsg: introduce ->populate method
  smsg: get rid of ->wrap initializer, too
  inbox: msg_by_*: remove $(size)ref args
  www: remove smsg_mime API and adjust callers
  nntp: smsg_range_i: favor ->{$field} lookups when possible
  smsg: get rid of remaining {mime} users
  smsg: remove ->bytes and ->lines methods
  smsg: remove remaining accessor methods
  wwwatomstream: drop smsg->{mid} fallback for non-SQLite

 Documentation/mknews.perl        |   7 +-
 lib/PublicInbox/ExtMsg.pm        |   2 +-
 lib/PublicInbox/Feed.pm          |   8 +-
 lib/PublicInbox/Import.pm        |  69 ++++++++---------
 lib/PublicInbox/Inbox.pm         |  32 ++++----
 lib/PublicInbox/Mbox.pm          |   2 +-
 lib/PublicInbox/NNTP.pm          |  14 +++-
 lib/PublicInbox/OverIdx.pm       |   3 +-
 lib/PublicInbox/SearchIdx.pm     |  33 ++++-----
 lib/PublicInbox/SearchView.pm    |   6 +-
 lib/PublicInbox/Smsg.pm          | 123 +++++++++++--------------------
 lib/PublicInbox/SolverGit.pm     |   4 +-
 lib/PublicInbox/V2Writable.pm    |  11 +--
 lib/PublicInbox/View.pm          |  63 ++++++++--------
 lib/PublicInbox/WwwAtomStream.pm |   8 +-
 t/altid.t                        |   3 +-
 t/altid_v2.t                     |   3 +-
 t/import.t                       |   3 +-
 t/search.t                       |  46 +++++++-----
 t/v2mda.t                        |   4 +-
 t/v2writable.t                   |   5 +-
 21 files changed, 207 insertions(+), 242 deletions(-)


^ permalink raw reply	[relevance 7%]

Results 1-2 of 2 | reverse | sort options + mbox downloads above
-- links below jump to the message on this page --
2020-06-01 10:06  7% [PATCH 00/13] smsg: remove tricky {mime} field Eric Wong
2020-06-01 10:06  3% ` [PATCH 12/13] smsg: remove remaining accessor methods Eric Wong

Code repositories for project(s) associated with this inbox:

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).