user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download mbox.gz: |
* [PATCH 58/82] imap: compile UID FETCH to opcodes
  2020-06-10  7:03  7% [PATCH 00/82] public-inbox-imapd: read-only IMAP server Eric Wong
@ 2020-06-10  7:04  4% ` Eric Wong
  0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2020-06-10  7:04 UTC (permalink / raw)
  To: meta

This is just a hair faster and cacheable in the future, if we
need it.  Most notably, this avoids doing PublicInbox::Eml->new
for simple "RFC822", "BODY[]", and "RFC822.SIZE" requests.
---
 lib/PublicInbox/IMAP.pm | 188 +++++++++++++++++++++++++---------------
 t/imap.t                |  16 ++++
 2 files changed, 134 insertions(+), 70 deletions(-)

diff --git a/lib/PublicInbox/IMAP.pm b/lib/PublicInbox/IMAP.pm
index 8307343cf15..3fae81112aa 100644
--- a/lib/PublicInbox/IMAP.pm
+++ b/lib/PublicInbox/IMAP.pm
@@ -40,22 +40,26 @@ sub LINE_MAX () { 512 } # does RFC 3501 have a limit like RFC 977?
 # changing this will cause grief for clients which cache
 sub UID_BLOCK () { 50_000 }
 
-my %FETCH_NEED_BLOB = ( # for future optimization
-	'BODY[HEADER]' => 1,
-	'BODY[TEXT]' => 1,
-	'BODY[]' => 1,
-	'RFC822.HEADER' => 1,
-	'RFC822.SIZE' => 1, # needs CRLF conversion :<
-	'RFC822.TEXT' => 1,
-	BODY => 1,
-	BODYSTRUCTURE => 1,
-	ENVELOPE => 1,
-	FLAGS => 0,
-	INTERNALDATE => 0,
-	RFC822 => 1,
-	UID => 0,
+# these values area also used for sorting
+sub NEED_BLOB () { 1 }
+sub NEED_EML () { NEED_BLOB|2 }
+my $OP_EML_NEW = [ NEED_EML - 1, \&op_eml_new ];
+
+my %FETCH_NEED = ( # for future optimization
+	'BODY[HEADER]' => [ NEED_EML, \&emit_rfc822_header ],
+	'BODY[TEXT]' => [ NEED_EML, \&emit_rfc822_text ],
+	'BODY[]' => [ NEED_BLOB, \&emit_rfc822 ],
+	'RFC822.HEADER' => [ NEED_EML, \&emit_rfc822_header ],
+	'RFC822.TEXT' => [ NEED_EML, \&emit_rfc822_text ],
+	'RFC822.SIZE' => [ NEED_BLOB, \&emit_rfc822_size ],
+	RFC822 => [ NEED_BLOB, \&emit_rfc822 ],
+	BODY => [ NEED_EML, \&emit_body ],
+	BODYSTRUCTURE => [ NEED_EML, \&emit_bodystructure ],
+	ENVELOPE => [ NEED_EML, \&emit_envelope ],
+	FLAGS => [ 0, \&emit_flags ],
+	INTERNALDATE => [ 0, \&emit_internaldate ],
 );
-my %FETCH_ATT = map { $_ => [ $_ ] } keys %FETCH_NEED_BLOB;
+my %FETCH_ATT = map { $_ => [ $_ ] } keys %FETCH_NEED;
 
 # aliases (RFC 3501 section 6.4.5)
 $FETCH_ATT{FAST} = [ qw(FLAGS INTERNALDATE RFC822.SIZE) ];
@@ -63,9 +67,10 @@ $FETCH_ATT{ALL} = [ @{$FETCH_ATT{FAST}}, 'ENVELOPE' ];
 $FETCH_ATT{FULL} = [ @{$FETCH_ATT{ALL}}, 'BODY' ];
 
 for my $att (keys %FETCH_ATT) {
-	my %h = map { $_ => 1 } @{$FETCH_ATT{$att}};
+	my %h = map { $_ => $FETCH_NEED{$_} } @{$FETCH_ATT{$att}};
 	$FETCH_ATT{$att} = \%h;
 }
+undef %FETCH_NEED;
 
 my $valid_range = '[0-9]+|[0-9]+:[0-9]+|[0-9]+:\*';
 $valid_range = qr/\A(?:$valid_range)(?:,(?:$valid_range))*\z/;
@@ -417,7 +422,7 @@ sub requeue_once ($) {
 
 sub uid_fetch_cb { # called by git->cat_async via git_async_cat
 	my ($bref, $oid, $type, $size, $fetch_m_arg) = @_;
-	my ($self, undef, $msgs, undef, $want) = @$fetch_m_arg;
+	my ($self, undef, $msgs, undef, $ops, $partial) = @$fetch_m_arg;
 	my $smsg = shift @$msgs or die 'BUG: no smsg';
 	if (!defined($oid)) {
 		# it's possible to have TOCTOU if an admin runs
@@ -426,51 +431,72 @@ sub uid_fetch_cb { # called by git->cat_async via git_async_cat
 	} else {
 		$smsg->{blob} eq $oid or die "BUG: $smsg->{blob} != $oid";
 	}
-
 	$$bref =~ s/(?<!\r)\n/\r\n/sg; # make strict clients happy
 
 	# fixup old bug from import (pre-a0c07cba0e5d8b6a)
 	$$bref =~ s/\A[\r\n]*From [^\r\n]*\r\n//s;
-
 	$self->msg_more("* $smsg->{num} FETCH (UID $smsg->{num}");
-
-	$want->{'RFC822.SIZE'} and
-		$self->msg_more(' RFC822.SIZE '.length($$bref));
-	$want->{INTERNALDATE} and
-		$self->msg_more(' INTERNALDATE "'.$smsg->internaldate.'"');
-	$want->{FLAGS} and $self->msg_more(' FLAGS ()');
-	for ('RFC822', 'BODY[]') {
-		$want->{$_} or next;
-		$self->msg_more(" $_ {".length($$bref)."}\r\n");
-		$self->msg_more($$bref);
+	my $eml;
+	for (my $i = 0; $i < @$ops;) {
+		my $k = $ops->[$i++];
+		$ops->[$i++]->($self, $k, $smsg, $bref, $eml);
 	}
+	partial_emit($self, $partial, $eml) if $partial;
+	$self->msg_more(")\r\n");
+	requeue_once($self);
+}
 
-	my $eml = PublicInbox::Eml->new($bref);
+sub emit_rfc822 {
+	my ($self, $k, undef, $bref) = @_;
+	$self->msg_more(" $k {" . length($$bref)."}\r\n");
+	$self->msg_more($$bref);
+}
 
-	$want->{ENVELOPE} and
-		$self->msg_more(' ENVELOPE '.eml_envelope($eml));
+# Mail::IMAPClient::message_string cares about this by default
+# (->Ignoresizeerrors attribute)
+sub emit_rfc822_size {
+	my ($self, $k, undef, $bref) = @_;
+	$self->msg_more(' RFC822.SIZE ' . length($$bref));
+}
 
-	for ('RFC822.HEADER', 'BODY[HEADER]') {
-		$want->{$_} or next;
-		$self->msg_more(" $_ {".length(${$eml->{hdr}})."}\r\n");
-		$self->msg_more(${$eml->{hdr}});
-	}
-	for ('RFC822.TEXT', 'BODY[TEXT]') {
-		$want->{$_} or next;
-		$self->msg_more(" $_ {".length($$bref)."}\r\n");
-		$self->msg_more($$bref);
-	}
-	$want->{BODYSTRUCTURE} and
-		$self->msg_more(' BODYSTRUCTURE '.fetch_body($eml, 1));
-	$want->{BODY} and
-		$self->msg_more(' BODY '.fetch_body($eml));
-	if (my $partial = $want->{-partial}) {
-		partial_emit($self, $partial, $eml);
-	}
-	$self->msg_more(")\r\n");
-	requeue_once($self);
+sub emit_internaldate {
+	my ($self, undef, $smsg) = @_;
+	$self->msg_more(' INTERNALDATE "'.$smsg->internaldate.'"');
+}
+
+sub emit_flags { $_[0]->msg_more(' FLAGS ()') }
+
+sub emit_envelope {
+	my ($self, undef, undef, undef, $eml) = @_;
+	$self->msg_more(' ENVELOPE '.eml_envelope($eml));
+}
+
+sub emit_rfc822_header {
+	my ($self, $k, undef, undef, $eml) = @_;
+	$self->msg_more(" $k {".length(${$eml->{hdr}})."}\r\n");
+	$self->msg_more(${$eml->{hdr}});
+}
+
+# n.b. this is sorted to be after any emit_eml_new ops
+sub emit_rfc822_text {
+	my ($self, $k, undef, $bref) = @_;
+	$self->msg_more(" $k {".length($$bref)."}\r\n");
+	$self->msg_more($$bref);
+}
+
+sub emit_bodystructure {
+	my ($self, undef, undef, undef, $eml) = @_;
+	$self->msg_more(' BODYSTRUCTURE '.fetch_body($eml, 1));
+}
+
+sub emit_body {
+	my ($self, undef, undef, undef, $eml) = @_;
+	$self->msg_more(' BODY '.fetch_body($eml));
 }
 
+# set $eml once ($_[4] == $eml, $_[3] == $bref)
+sub op_eml_new { $_[4] = PublicInbox::Eml->new($_[3]) }
+
 sub uid_clamp ($$$) {
 	my ($self, $beg, $end) = @_;
 	my $uid_min = $self->{uid_min} or return;
@@ -521,7 +547,7 @@ sub refill_range ($$$) {
 }
 
 sub uid_fetch_m { # long_response
-	my ($self, $tag, $msgs, $range_info, $want) = @_;
+	my ($self, $tag, $msgs, $range_info) = @_; # \@ops, \@partial
 	while (!@$msgs) { # rare
 		if (my $end = refill_range($self, $msgs, $range_info)) {
 			$self->write(\"$tag $end\r\n");
@@ -710,42 +736,64 @@ sub partial_emit ($$$) {
 	}
 }
 
-sub fetch_common ($$$$) {
-	my ($self, $tag, $range_csv, $want) = @_;
-	my $ibx = $self->{ibx} or return "$tag BAD No mailbox selected\r\n";
+sub fetch_compile ($) {
+	my ($want) = @_;
 	if ($want->[0] =~ s/\A\(//s) {
-		$want->[-1] =~ s/\)\z//s or return "$tag BAD no rparen\r\n";
+		$want->[-1] =~ s/\)\z//s or return 'BAD no rparen';
 	}
-	my (%partial, %want);
+	my (%partial, %seen, @op);
+	my $need = 0;
 	while (defined(my $att = shift @$want)) {
 		$att = uc($att);
+		next if $att eq 'UID'; # always returned
 		$att =~ s/\ABODY\.PEEK\[/BODY\[/; # we're read-only
 		my $x = $FETCH_ATT{$att};
 		if ($x) {
-			%want = (%want, %$x);
+			while (my ($k, $fl_cb) = each %$x) {
+				next if $seen{$k}++;
+				$need |= $fl_cb->[0];
+
+				# insert a special op to convert $bref to $eml
+				# the first time we need it
+				if ($need == NEED_EML && !$seen{$need}++) {
+					push @op, $OP_EML_NEW;
+				}
+				# $fl_cb = [ flags, \&emit_foo ]
+				push @op, [ @$fl_cb , $k ];
+			}
 		} elsif (!partial_prepare(\%partial, $want, $att)) {
-			return "$tag BAD param: $att\r\n";
+			return "BAD param: $att";
 		}
 	}
+	my @r;
 
 	# stabilize partial order for consistency and ease-of-debugging:
 	if (scalar keys %partial) {
-		$want{-partial} = [ map {;
-			[ $_, @{$partial{$_}} ]
-		} sort keys %partial ];
+		$need = NEED_EML;
+		push @op, $OP_EML_NEW if !$seen{$need}++;
+		$r[2] = [ map { [ $_, @{$partial{$_}} ] } sort keys %partial ];
 	}
-	$range_csv = 'bad' if $range_csv !~ $valid_range;
-	my $range_info = range_step($self, \$range_csv);
-	return "$tag $range_info\r\n" if !ref($range_info);
-	[ $tag, [], $range_info, \%want ];
+
+	$r[0] = $need;
+
+	# r[1] = [ $key1, $cb1, $key2, $cb2, ... ]
+	use sort 'stable'; # makes output more consistent
+	$r[1] = [ map { ($_->[2], $_->[1]) } sort { $a->[0] <=> $b->[0] } @op ];
+	@r;
 }
 
 sub cmd_uid_fetch ($$$;@) {
 	my ($self, $tag, $range_csv, @want) = @_;
-	my $args = fetch_common($self, $tag, $range_csv, \@want);
-	ref($args) eq 'ARRAY' ?
-		long_response($self, \&uid_fetch_m, @$args) :
-		$args; # error
+	my $ibx = $self->{ibx} or return "$tag BAD No mailbox selected\r\n";
+	my ($need, $ops, $partial) = fetch_compile(\@want);
+	return "$tag $need\r\n" unless $ops;
+
+	$range_csv = 'bad' if $range_csv !~ $valid_range;
+	my $range_info = range_step($self, \$range_csv);
+	return "$tag $range_info\r\n" if !ref($range_info);
+
+	long_response($self, \&uid_fetch_m,
+			$tag, [], $range_info, $ops, $partial);
 }
 
 sub parse_date ($) { # 02-Oct-1993
diff --git a/t/imap.t b/t/imap.t
index 47e86ef42c7..2401237c8a0 100644
--- a/t/imap.t
+++ b/t/imap.t
@@ -107,4 +107,20 @@ EOF
 	}, 'structure matches expected');
 }
 
+{
+	my $fetch_compile = \&PublicInbox::IMAP::fetch_compile;
+	my ($cb, $ops, $partial) = $fetch_compile->(['BODY[]']);
+	is($partial, undef, 'no partial fetch data');
+	is_deeply($ops,
+		[ 'BODY[]', \&PublicInbox::IMAP::emit_rfc822 ],
+		'proper key and op compiled for BODY[]');
+
+	($cb, $ops, $partial) = $fetch_compile->(['BODY', 'BODY[]']);
+	is_deeply($ops, [
+		'BODY[]', \&PublicInbox::IMAP::emit_rfc822,
+		undef, \&PublicInbox::IMAP::op_eml_new,
+		'BODY', \&PublicInbox::IMAP::emit_body,
+	], 'placed op_eml_new before emit_body');
+}
+
 done_testing;

^ permalink raw reply related	[relevance 4%]

* [PATCH 00/82] public-inbox-imapd: read-only IMAP server
@ 2020-06-10  7:03  7% Eric Wong
  2020-06-10  7:04  4% ` [PATCH 58/82] imap: compile UID FETCH to opcodes Eric Wong
  0 siblings, 1 reply; 2+ results
From: Eric Wong @ 2020-06-10  7:03 UTC (permalink / raw)
  To: meta

So I finally wrote my first IMAP server!  And I'm actually
fairly satisfied with how it's turning out to support a bunch
of other performance + scalability work I've wanted to do.

Some previous notes here:
  https://public-inbox.org/meta/20200609113442.GA16856@dcvr/

I finally seem to have gotten it to play nicely with mutt header
caching, so it's fit for public consumption :)

imaps://news.public-inbox.org/INBOX.comp.mail.public-inbox.meta.0

You can use any username+password, and AUTH=ANONYMOUS also
works if your client does that.

It doesn't support UTF-7 (mailbox names) or advertise UTF-8
in CAPABILITIES, yet; I still have RFCs to read :P

And there's a bunch of new things which could use some
testing from non-mutt/mbsync/offlineimap users.
Maybe you'll find some client-side bugs like I did :P

v1 reindexing also gets a little bit of parallelism :)

Anyways, I'll probably be porting some of the scalability
and slow-storage work to older parts of the code before
fiddling with more IMAP extensions.

Eric Wong (82):
  doc: add some IMAP standards
  nntpd: restrict allowed newsgroup names
  preliminary imap server implementation
  inboxidle: new class to detect inbox changes
  imap: support IDLE
  msgmap: split ->max into its own method
  imap: delay InboxIdle start, support refresh
  imap: implement STATUS command
  imap: use Text::ParseWords::parse_line to handle quoted words
  imap: support LIST command
  t/imapd: support FakeInotify and KQNotify
  imap: support fetch for BODYSTRUCTURE and BODY
  eml: each_part: single part $idx is 1
  imap: allow fetch of partial of BODY[...] and headers
  imap: always include `resp-text' in responses
  imap: split out unit tests and benchmarks
  imap: fix multi-message partial header fetches
  imap: simplify partial fetch structure
  imap: support sequence number FETCH
  imap: do not include ".PEEK" in responses
  imap: support the CLOSE command
  imap: speed up HEADER.FIELDS[.NOT] range fetches
  git: async: flatten the inflight array
  git: do our own read buffering for cat-file
  imap: use git-cat-file asynchronously
  git: idle rbuf for async
  imap: support LSUB command
  imap: FETCH: support comma-delimited ranges
  add imapd compression test
  testcommon: tcp_(server|connect): BAIL_OUT on failure
  *deflate: drop invalid comment about rbuf
  imap: fix pipelining with async git
  git: cat_async: provide requested OID + "missing" on missing blobs
  git: move async_cat reference to PublicInbox::Git
  git: async: automatic retry on alternates change
  imapclient: wrapper for Mail::IMAPClient
  xt: add imapd-validate and imapd-mbsync-oimap
  imap: support out-of-bounds ranges
  xt/perf-imap-list: time refresh_inboxlist
  imap: case-insensitive mailbox name comparisons
  imap: break giant inboxes into sub-inboxes of 50K messages
  imap: start introducing iterative config reloading
  imap: require ".$UID_MIN-$UID_END" suffix
  imapd: ensure LIST is sorted alphabetically, for now
  imap: omit $UID_END from mailbox name, use index
  t/config.t: always compare against git bool behavior
  xt/*: show some tunable parameters
  imap: STATUS and LIST are case-insensitive, too
  imap: EXAMINE/STATUS: return correct counts
  imap: avoid uninitialized warnings on incomplete commands
  imap: start parsing out queries for SQLite and Xapian
  imap: SEARCH: clamp results to the 50K UID range
  imap: allow UID range search on timestamps
  over: get_art: use dbh->prepare_cached
  search: index byte size of a message for IMAP search
  search: index UID for IMAP search, too
  imap: remove dummies from sequence number FETCH
  imap: compile UID FETCH to opcodes
  imap: UID FETCH: optimize for smsg-only case
  imap: UID FETCH: optimize (UID FLAGS) harder
  imap: IDLE: avoid extraneous wakeups, keep-alive
  imap: 30 minute auto-logout timer
  imap: split ->logged_in attribute into a separate class
  searchidx: v1 (re)-index uses git asynchronously
  index: account for CRLF conversion when storing bytes
  imap: rely on smsg->{bytes} for RFC822.SIZE
  imap: UID FETCH requires at least one data item
  imap: LIST shows "INBOX" in all caps
  imap: support 8000 octet lines
  imap: reinstate some message sequence number support
  imap: cleanup ->{uid_base} usage
  imap: FETCH: more granular CRLF conversion
  imap: further speed up HEADER.FIELDS FETCH requests
  imap: FETCH: try to make fake MSNs sequentially
  imap: STATUS/EXAMINE: rely on SQLite overview
  imap: UID SEARCH: support multiple ranges
  imap: wire up Xapian search, msn SEARCH and multiple ranges
  imap: misc cleanups and notes
  imapd: don't bother sorting LIST output
  imap: drop non-UID SEARCH for now
  over: uid_range: remove LIMIT
  imap: FETCH: proper MSN => UID mapping for requests

 Documentation/public-inbox-imapd.pod |   91 ++
 Documentation/standards.perl         |   10 +
 MANIFEST                             |   18 +
 lib/PublicInbox/Config.pm            |   18 +
 lib/PublicInbox/Daemon.pm            |   24 +-
 lib/PublicInbox/DummyInbox.pm        |   22 +
 lib/PublicInbox/Eml.pm               |    9 +-
 lib/PublicInbox/FakeInotify.pm       |   59 ++
 lib/PublicInbox/Git.pm               |  163 +--
 lib/PublicInbox/GitAsyncCat.pm       |   51 +
 lib/PublicInbox/IMAP.pm              | 1397 ++++++++++++++++++++++++++
 lib/PublicInbox/IMAPClient.pm        |  119 +++
 lib/PublicInbox/IMAPD.pm             |  114 +++
 lib/PublicInbox/IMAPdeflate.pm       |  126 +++
 lib/PublicInbox/Import.pm            |    2 +-
 lib/PublicInbox/In2Tie.pm            |   17 +
 lib/PublicInbox/Inbox.pm             |   33 +-
 lib/PublicInbox/InboxIdle.pm         |   79 ++
 lib/PublicInbox/KQNotify.pm          |   66 ++
 lib/PublicInbox/Lock.pm              |    7 +
 lib/PublicInbox/MsgIter.pm           |    2 +-
 lib/PublicInbox/Msgmap.pm            |   20 +-
 lib/PublicInbox/NNTPD.pm             |   12 +-
 lib/PublicInbox/NNTPdeflate.pm       |    1 -
 lib/PublicInbox/Over.pm              |   50 +-
 lib/PublicInbox/Search.pm            |   32 +-
 lib/PublicInbox/SearchIdx.pm         |   89 +-
 lib/PublicInbox/SearchIdxShard.pm    |   11 +-
 lib/PublicInbox/Smsg.pm              |    8 +-
 lib/PublicInbox/TestCommon.pm        |    7 +-
 lib/PublicInbox/V2Writable.pm        |   10 +-
 script/public-inbox-imapd            |   14 +
 t/config.t                           |   15 +-
 t/eml.t                              |    2 +-
 t/git.t                              |   40 +-
 t/imap.t                             |  133 +++
 t/imapd-tls.t                        |  204 ++++
 t/imapd.t                            |  398 ++++++++
 t/import.t                           |    5 +-
 t/inbox_idle.t                       |   72 ++
 t/nntpd.t                            |    5 +-
 t/over.t                             |    3 +
 t/search.t                           |   19 +
 xt/cmp-msgstr.t                      |    1 -
 xt/cmp-msgview.t                     |    1 -
 xt/eml_check_limits.t                |    6 +-
 xt/git_async_cmp.t                   |    2 +-
 xt/imapd-mbsync-oimap.t              |  132 +++
 xt/imapd-validate.t                  |  177 ++++
 xt/mem-msgview.t                     |    1 +
 xt/msgtime_cmp.t                     |    1 -
 xt/perf-msgview.t                    |    1 -
 52 files changed, 3718 insertions(+), 181 deletions(-)
 create mode 100644 Documentation/public-inbox-imapd.pod
 create mode 100644 lib/PublicInbox/DummyInbox.pm
 create mode 100644 lib/PublicInbox/FakeInotify.pm
 create mode 100644 lib/PublicInbox/GitAsyncCat.pm
 create mode 100644 lib/PublicInbox/IMAP.pm
 create mode 100644 lib/PublicInbox/IMAPClient.pm
 create mode 100644 lib/PublicInbox/IMAPD.pm
 create mode 100644 lib/PublicInbox/IMAPdeflate.pm
 create mode 100644 lib/PublicInbox/In2Tie.pm
 create mode 100644 lib/PublicInbox/InboxIdle.pm
 create mode 100644 lib/PublicInbox/KQNotify.pm
 create mode 100644 script/public-inbox-imapd
 create mode 100644 t/imap.t
 create mode 100644 t/imapd-tls.t
 create mode 100644 t/imapd.t
 create mode 100644 t/inbox_idle.t
 create mode 100644 xt/imapd-mbsync-oimap.t
 create mode 100644 xt/imapd-validate.t

^ permalink raw reply	[relevance 7%]

Results 1-2 of 2 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2020-06-10  7:03  7% [PATCH 00/82] public-inbox-imapd: read-only IMAP server Eric Wong
2020-06-10  7:04  4% ` [PATCH 58/82] imap: compile UID FETCH to opcodes Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).