user/dev discussion of public-inbox itself
 help / color / Atom feed
* [PATCH 0/2] imap: search improvements
@ 2020-06-16  5:05 Eric Wong
  2020-06-16  5:05 ` [PATCH 1/2] imap: reinstate non-UID SEARCH Eric Wong
                   ` (3 more replies)
  0 siblings, 4 replies; 5+ messages in thread
From: Eric Wong @ 2020-06-16  5:05 UTC (permalink / raw)
  To: meta

Non-UID SEARCH could be useful to someone, somewhere;
now that we have the memory-efficient uo2m mapping to
satisfy mutt users.

I also decided to replace the janky IMAP search parser
with a more proper one based on Parse::RecDescent:

	https://public-inbox.org/meta/20200615062114.GA19510@dcvr/

Learning P::RD was fairly straightforward for an old dog like me
and I may use it more in the future for this and other projects.

Eric Wong (2):
  imap: reinstate non-UID SEARCH
  imap: *SEARCH: use Parse::RecDescent

 MANIFEST                        |   2 +
 lib/PublicInbox/IMAP.pm         | 132 +++++----------
 lib/PublicInbox/IMAPsearchqp.pm | 276 ++++++++++++++++++++++++++++++++
 t/imap.t                        |  18 +--
 t/imap_searchqp.t               | 105 ++++++++++++
 t/imapd-tls.t                   |   2 +-
 t/imapd.t                       |  48 +++++-
 xt/mem-imapd-tls.t              |   3 +-
 8 files changed, 474 insertions(+), 112 deletions(-)
 create mode 100644 lib/PublicInbox/IMAPsearchqp.pm
 create mode 100644 t/imap_searchqp.t

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 1/2] imap: reinstate non-UID SEARCH
  2020-06-16  5:05 [PATCH 0/2] imap: search improvements Eric Wong
@ 2020-06-16  5:05 ` Eric Wong
  2020-06-16  5:05 ` [PATCH 2/2] imap: *SEARCH: use Parse::RecDescent Eric Wong
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 5+ messages in thread
From: Eric Wong @ 2020-06-16  5:05 UTC (permalink / raw)
  To: meta

Since we support MSNs properly, now, it seems acceptable
to support regular SEARCH requests in case there are any
clients which still use non-UID SEARCH.
---
 lib/PublicInbox/IMAP.pm | 35 ++++++++++++++++++++++++++---------
 t/imapd.t               | 38 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 64 insertions(+), 9 deletions(-)

diff --git a/lib/PublicInbox/IMAP.pm b/lib/PublicInbox/IMAP.pm
index 373bffc18d9..4631ea7eabc 100644
--- a/lib/PublicInbox/IMAP.pm
+++ b/lib/PublicInbox/IMAP.pm
@@ -1086,14 +1086,23 @@ sub parse_date ($) { # 02-Oct-1993
 	timegm(0, 0, 0, $dd, $mm, $yyyy);
 }
 
+sub msn_convert ($$) {
+	my ($self, $uids) = @_;
+	my $adj = $self->{uid_base} + 1;
+	my $uo2m = uo2m_extend($self, $uids->[-1]);
+	$uo2m = [ unpack('S*', $uo2m) ] if !ref($uo2m);
+	$_ = $uo2m->[$_ - $adj] for @$uids;
+}
+
 sub search_uid_range { # long_response
-	my ($self, $tag, $sql, $range_info) = @_;
+	my ($self, $tag, $sql, $range_info, $want_msn) = @_;
 	my $uids = [];
 	if (defined(my $err = refill_uids($self, $uids, $range_info, $sql))) {
 		$err ||= 'OK Search done';
 		$self->write("\r\n$tag $err\r\n");
 		return;
 	}
+	msn_convert($self, $uids) if $want_msn;
 	$self->msg_more(join(' ', '', @$uids));
 	1; # more
 }
@@ -1256,38 +1265,46 @@ sub refill_xap ($$$$) {
 }
 
 sub search_xap_range { # long_response
-	my ($self, $tag, $q, $range_info) = @_;
+	my ($self, $tag, $q, $range_info, $want_msn) = @_;
 	my $uids = [];
 	if (defined(my $err = refill_xap($self, $uids, $range_info, $q))) {
 		$err ||= 'OK Search done';
 		$self->write("\r\n$tag $err\r\n");
 		return;
 	}
+	msn_convert($self, $uids) if $want_msn;
 	$self->msg_more(join(' ', '', @$uids));
 	1; # more
 }
 
-sub cmd_uid_search ($$$;) {
-	my ($self, $tag) = splice(@_, 0, 2);
+sub search_common {
+	my ($self, $tag, $rest, $want_msn) = @_;
 	my $ibx = $self->{ibx} or return "$tag BAD No mailbox selected\r\n";
-	my $q = parse_query($self, \@_);
+	my $q = parse_query($self, $rest);
 	return "$tag $q\r\n" if !ref($q);
 	my ($sql, $range_info) = delete @$q{qw(sql range_info)};
 	if (!scalar(keys %$q)) { # overview.sqlite3
 		$self->msg_more('* SEARCH');
 		long_response($self, \&search_uid_range,
-				$tag, $sql, $range_info);
+				$tag, $sql, $range_info, $want_msn);
 	} elsif ($q = $q->{xap}) {
 		$self->msg_more('* SEARCH');
 		long_response($self, \&search_xap_range,
-				$tag, $q, $range_info);
+				$tag, $q, $range_info, $want_msn);
 	} else {
 		"$tag BAD Error\r\n";
 	}
 }
 
-# note: MSN SEARCH is NOT supported.  Do any widely-used MUAs
-# rely on MSNs from SEARCH results?  Let us know at meta@public-inbox.org
+sub cmd_uid_search ($$$;) {
+	my ($self, $tag) = splice(@_, 0, 2);
+	search_common($self, $tag, \@_);
+}
+
+sub cmd_search ($$$;) {
+	my ($self, $tag) = splice(@_, 0, 2);
+	search_common($self, $tag, \@_, 1);
+}
 
 sub args_ok ($$) { # duplicated from PublicInbox::NNTP
 	my ($cb, $argc) = @_;
diff --git a/t/imapd.t b/t/imapd.t
index edfc52046d2..36082d8c7ee 100644
--- a/t/imapd.t
+++ b/t/imapd.t
@@ -367,6 +367,44 @@ is(scalar keys %$ret, 3, 'got all 3 messages with comma-separated sequence');
 $ret = $mic->fetch_hash('1:*', 'RFC822') or BAIL_OUT "FETCH $@";
 is(scalar keys %$ret, 3, 'got all 3 messages');
 
+SKIP: {
+	# do any clients use non-UID IMAP SEARCH?
+	skip 'Xapian missing', 2 if $level eq 'basic';
+	my $x = $mic->search('all');
+	is_deeply($x, [1, 2, 3], 'MSN SEARCH works before rm');
+	$x = $mic->search(qw(header subject embedded));
+	is_deeply($x, [2], 'MSN SEARCH on Subject works before rm');
+}
+
+{
+	my $rdr = { 0 => \($ret->{1}->{RFC822}) };
+	my $env = { HOME => $ENV{HOME} };
+	my @cmd = qw(-learn rm --all);
+	run_script(\@cmd, $env, $rdr) or BAIL_OUT('-learn rm');
+}
+
+SKIP: {
+	# do any clients use non-UID IMAP SEARCH?  We only ensure
+	# MSN "SEARCH" can return a result which can be retrieved
+	# via MSN "FETCH"
+	skip 'Xapian missing', 3 if $level eq 'basic';
+	my $x = $mic->search(qw(header subject embedded));
+	is(scalar(@$x), 1, 'MSN SEARCH on Subject works after rm');
+	$x = $mic->message_string($x->[0]);
+	is($x, $ret->{2}->{RFC822}, 'message 2 unchanged');
+}
+
+# FIXME? no EXPUNGE response, yet
+my $r2 = $mic->fetch_hash('1:*', 'BODY.PEEK[]') or BAIL_OUT "FETCH $@";
+is(scalar keys %$r2, 2, 'did not get all 3 messages');
+is($r2->{2}->{'BODY[]'}, $ret->{2}->{RFC822}, 'message 2 unchanged');
+is($r2->{3}->{'BODY[]'}, $ret->{3}->{RFC822}, 'message 3 unchanged');
+$r2 = $mic->fetch_hash(2, 'BODY.PEEK[HEADER.FIELDS (message-id)]')
+			or BAIL_OUT "FETCH $@";
+is($r2->{2}->{'BODY[HEADER.FIELDS (MESSAGE-ID)]'},
+	'Message-ID: <20200418222508.GA13918@dcvr>'."\r\n\r\n",
+	'BODY.PEEK[HEADER.FIELDS ...] drops .PEEK');
+
 {
 	my @new_list = $mic->list;
 	# tag differs in [-1]

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 2/2] imap: *SEARCH: use Parse::RecDescent
  2020-06-16  5:05 [PATCH 0/2] imap: search improvements Eric Wong
  2020-06-16  5:05 ` [PATCH 1/2] imap: reinstate non-UID SEARCH Eric Wong
@ 2020-06-16  5:05 ` Eric Wong
  2020-06-16  7:04 ` [PATCH 3/2] imap: *SEARCH: fix CHARSET handling Eric Wong
  2020-06-16  7:05 ` [PATCH 4/2] imap: *SEARCH: reinstate "TEXT" search-key Eric Wong
  3 siblings, 0 replies; 5+ messages in thread
From: Eric Wong @ 2020-06-16  5:05 UTC (permalink / raw)
  To: meta

For properly parsing IMAP search requests, it's easier to use a
recursive descent parser generator to deal with subqueries and
the "OR" statement.

Parse::RecDescent was chosen since it's mature, well-known,
widely available and already used by our optional dependencies:
Inline::C and Mail::IMAPClient.  While it's possible to build
Xapian queries without using the Xapian string query parser;
this iteration of the IMAP parser still builds a string which is
passed to Xapian's query parser for ease-of-diagnostics.

Since this is a recursive descent parser dealing with untrusted
inputs, subqueries have a nesting limit of 10.  I expect that is
more than adequate for real-world use.
---
 MANIFEST                        |   2 +
 lib/PublicInbox/IMAP.pm         | 101 ++----------
 lib/PublicInbox/IMAPsearchqp.pm | 276 ++++++++++++++++++++++++++++++++
 t/imap.t                        |  18 +--
 t/imap_searchqp.t               | 105 ++++++++++++
 t/imapd-tls.t                   |   2 +-
 t/imapd.t                       |  10 +-
 xt/mem-imapd-tls.t              |   3 +-
 8 files changed, 412 insertions(+), 105 deletions(-)
 create mode 100644 lib/PublicInbox/IMAPsearchqp.pm
 create mode 100644 t/imap_searchqp.t

diff --git a/MANIFEST b/MANIFEST
index 6d94749fae7..3e7d4cc0e29 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -133,6 +133,7 @@ lib/PublicInbox/IMAP.pm
 lib/PublicInbox/IMAPClient.pm
 lib/PublicInbox/IMAPD.pm
 lib/PublicInbox/IMAPdeflate.pm
+lib/PublicInbox/IMAPsearchqp.pm
 lib/PublicInbox/Import.pm
 lib/PublicInbox/In2Tie.pm
 lib/PublicInbox/Inbox.pm
@@ -271,6 +272,7 @@ t/httpd-unix.t
 t/httpd.t
 t/hval.t
 t/imap.t
+t/imap_searchqp.t
 t/imapd-tls.t
 t/imapd.t
 t/import.t
diff --git a/lib/PublicInbox/IMAP.pm b/lib/PublicInbox/IMAP.pm
index 4631ea7eabc..dd983dfd282 100644
--- a/lib/PublicInbox/IMAP.pm
+++ b/lib/PublicInbox/IMAP.pm
@@ -34,10 +34,9 @@ use PublicInbox::Syscall qw(EPOLLIN EPOLLONESHOT);
 use PublicInbox::GitAsyncCat;
 use Text::ParseWords qw(parse_line);
 use Errno qw(EAGAIN);
-use Time::Local qw(timegm);
-use POSIX qw(strftime);
 use Hash::Util qw(unlock_hash); # dependency of fields for perl 5.10+, anyways
 use PublicInbox::Search;
+use PublicInbox::IMAPsearchqp;
 *mdocid = \&PublicInbox::Search::mdocid;
 
 my $Address;
@@ -97,10 +96,6 @@ undef %FETCH_NEED;
 my $valid_range = '[0-9]+|[0-9]+:[0-9]+|[0-9]+:\*';
 $valid_range = qr/\A(?:$valid_range)(?:,(?:$valid_range))*\z/;
 
-my @MoY = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec);
-my %MoY;
-@MoY{@MoY} = (0..11);
-
 # RFC 3501 5.4. Autologout Timer needs to be >= 30min
 $PublicInbox::DS::EXPTIME = 60 * 30;
 
@@ -1076,16 +1071,6 @@ sub cmd_fetch ($$$$;@) {
 	long_response($self, $cb, $tag, [], $range_info, $ops, $partial);
 }
 
-sub parse_date ($) { # 02-Oct-1993
-	my ($date_text) = @_;
-	my ($dd, $mon, $yyyy) = split(/-/, $_[0], 3);
-	defined($yyyy) or return;
-	my $mm = $MoY{$mon} // return;
-	$dd =~ /\A[0123]?[0-9]\z/ or return;
-	$yyyy =~ /\A[0-9]{4,}\z/ or return; # Y10K-compatible!
-	timegm(0, 0, 0, $dd, $mm, $yyyy);
-}
-
 sub msn_convert ($$) {
 	my ($self, $uids) = @_;
 	my $adj = $self->{uid_base} + 1;
@@ -1168,81 +1153,20 @@ sub xap_append ($$$$) {
 	undef;
 }
 
-sub parse_query {
+sub parse_query ($$) {
 	my ($self, $rest) = @_;
 	if (uc($rest->[0]) eq 'CHARSET') {
 		shift @$rest;
 		defined(my $c = shift @$rest) or return 'BAD missing charset';
 		$c =~ /\A(?:UTF-8|US-ASCII)\z/ or return 'NO [BADCHARSET]';
 	}
-
-	my $sql = ''; # date conditions, {sql} deleted if Xapian is needed
-	my $xap = '';
-	my $q = { sql => \$sql, xap => \$xap };
-	my $msn2uid;
-	while (@$rest) {
-		my $k = uc(shift @$rest);
-		# default criteria
-		next if $k =~ /\A(?:ALL|RECENT|UNSEEN|NEW)\z/;
-		next if $k eq 'AND'; # the default, until we support OR
-		if ($k =~ $valid_range) { # convert sequence numbers to UIDs
-			msn_to_uid_range($msn2uid //= msn2uid($self), $k);
-			push @{$q->{uid}}, $k;
-		} elsif ($k eq 'UID') {
-			$k = shift(@$rest) // '';
-			$k =~ $valid_range or return 'BAD UID range';
-			push @{$q->{uid}}, $k;
-		} elsif ($k =~ /\A(?:SENT)?(?:SINCE|ON|BEFORE)\z/) {
-			my $d = parse_date(shift(@$rest) // '');
-			defined $d or return "BAD $k date format";
-			date_search($q, $k, $d);
-		} elsif ($k =~ /\A(?:SMALLER|LARGER)\z/) {
-			delete $q->{sql}; # can't use over.sqlite3
-			my $bytes = shift(@$rest) // '';
-			$bytes =~ /\A[0-9]+\z/ or return "BAD $k not a number";
-			$xap .= ' bytes:' . ($k eq 'SMALLER' ?
-							'..'.(--$bytes) :
-							(++$bytes).'..');
-		} elsif ($k eq 'HEADER') {
-			$k = uc(shift(@$rest) // '');
-			my $xk = $H2X{$k} or
-				return "BAD HEADER $k not supported";
-			my $err = xap_append($q, $rest, $k, $xk);
-			return $err if $err;
-		} elsif (defined(my $xk = $I2X{$k})) {
-			my $err = xap_append($q, $rest, $k, $xk);
-			return $err if $err;
-		} else {
-			# TODO: parentheses, OR, NOT ...
-			return "BAD $k not supported (yet?)";
-		}
-	}
-
-	# favor using over.sqlite3 if possible, since Xapian is optional
-	if (exists $q->{sql}) {
-		delete($q->{xap});
-		delete($q->{sql}) if $sql eq '';
-	} elsif (!$self->{ibx}->search) {
-		return 'BAD Xapian not configured for mailbox';
-	}
-	my $max = $self->{ibx}->over->max;
-	if (my $uid = delete $q->{uid}) {
-		my $range_csv = join(',', @$uid);
-		do {
-			my $nxt = range_step($self, \$range_csv);
-			my ($beg, $end) = @$nxt;
-			if ($xap) {
-				$xap .= " uid:$beg..$end";
-			} elsif ($beg == $end) {
-				$sql .= " AND num = $beg";
-			} else {
-				$sql .= " AND num >= $beg AND num <= $end";
-			}
-		} while ($range_csv);
+	my $q = PublicInbox::IMAPsearchqp::parse($self, join(' ', @$rest));
+	if (ref($q)) {
+		my $max = $self->{ibx}->over->max;
+		my $beg = 1;
+		uid_clamp($self, \$beg, \$max);
+		$q->{range_info} = [ $beg, $max ];
 	}
-	my $beg = 1;
-	uid_clamp($self, \$beg, \$max);
-	$q->{range_info} = [ $beg, $max ];
 	$q;
 }
 
@@ -1253,7 +1177,7 @@ sub refill_xap ($$$$) {
 	my $opt = { mset => 2, limit => 1000 };
 	my $nshard = $srch->{nshard} // 1;
 	while (1) {
-		my $mset = $srch->query("$$q uid:$beg..$end", $opt);
+		my $mset = $srch->query("$q uid:$beg..$end", $opt);
 		@$uids = map { mdocid($nshard, $_) } $mset->items;
 		if (@$uids) {
 			$range_info->[0] = $uids->[-1] + 1; # update $beg
@@ -1288,6 +1212,8 @@ sub search_common {
 		long_response($self, \&search_uid_range,
 				$tag, $sql, $range_info, $want_msn);
 	} elsif ($q = $q->{xap}) {
+		$self->{ibx}->search or
+			return "$tag BAD search not available for mailbox\r\n";
 		$self->msg_more('* SEARCH');
 		long_response($self, \&search_xap_range,
 				$tag, $q, $range_info, $want_msn);
@@ -1321,6 +1247,7 @@ sub process_line ($$) {
 
 	# TODO: IMAP allows literals for big requests to upload messages
 	# (which we don't support) but maybe some big search queries use it.
+	# RFC 3501 9 (2) doesn't permit TAB or multiple SP
 	my ($tag, $req, @args) = parse_line('[ \t]+', 0, $l);
 	pop(@args) if (@args && !defined($args[-1]));
 	if (@args && uc($req) eq 'UID') {
@@ -1332,6 +1259,10 @@ sub process_line ($$) {
 				idle_done($self, $tag) :
 				"$idle_tag BAD expected DONE\r\n";
 		} elsif (my $cmd = $self->can('cmd_'.lc($req // ''))) {
+			if ($cmd == \&cmd_uid_search || $cmd == \&cmd_search) {
+				# preserve user-supplied quotes for search
+				(undef, @args) = split(/ search /i, $l, 2);
+			}
 			$cmd->($self, $tag, @args);
 		} else { # this is weird
 			auth_challenge_ok($self) //
diff --git a/lib/PublicInbox/IMAPsearchqp.pm b/lib/PublicInbox/IMAPsearchqp.pm
new file mode 100644
index 00000000000..fba3baccf50
--- /dev/null
+++ b/lib/PublicInbox/IMAPsearchqp.pm
@@ -0,0 +1,276 @@
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+# IMAP search query parser.  cf RFC 3501
+
+# We currently compile Xapian queries to a string which is fed
+# to Xapian's query parser.  However, we may use Xapian-provided
+# Query object API to build an optree, instead.
+package PublicInbox::IMAPsearchqp;
+use strict;
+use Parse::RecDescent;
+use Time::Local qw(timegm);
+use POSIX qw(strftime);
+our $q = bless {}, __PACKAGE__; # singleton, reachable in generated P::RD
+my @MoY = qw(JAN FEB MAR APR MAY JUN JUL AUG SEP OCT NOV DEC);
+my %MM = map {; $MoY[$_-1] => sprintf('%02u', $_) } (1..12);
+
+# IMAP to Xapian header search key mapping
+my %IH2X = (
+	TEXT => '',
+	SUBJECT => 's:',
+	BODY => 'b:',
+	FROM => 'f:',
+	TO => 't:',
+	CC => 'c:',
+	# BCC => 'bcc:', # TODO
+
+	# IMAP allows searching arbitrary headers via
+	# "HEADER $field_name $string" which gets silly expensive.
+	# We only allow the headers we already index.
+	'MESSAGE-ID' => 'm:',
+	'LIST-ID' => 'l:',
+	# KEYWORD # TODO ? dfpre,dfpost,...
+);
+
+sub uid_set_xap ($$) {
+	my ($self, $seq_set) = @_;
+	my @u;
+	do {
+		my $u = $self->{imap}->range_step(\$seq_set);
+		die $u unless ref($u); # break out of the parser on error
+		push @u, "uid:$u->[0]..$u->[1]";
+	} while ($seq_set);
+	push(@{$q->{xap}}, @u > 1 ? '('.join(' OR ', @u).')' : $u[0]);
+}
+
+sub xap_only ($;$) {
+	my ($self, $query) = @_;
+	delete $self->{sql}; # query too complex for over.sqlite3
+	push @{$self->{xap}}, $query if defined($query);
+
+	# looks like we can't use SQLite-only, convert SQLite UID
+	# ranges to Xapian:
+	if (my $uid = delete $self->{uid}) {
+		uid_set_xap($self, $_) for @$uid;
+	}
+	1;
+}
+
+sub ih2x {
+	my ($self, $field_name, $s) = @_; # $self == $q
+	$s =~ /\A"(.*?)"\z/s and $s = $1;
+
+	# AFAIK Xapian can't handle [*"] in probabilistic terms,
+	# and it relies on lowercase
+	my $xk = defined($field_name) ? ($IH2X{$field_name} // '') : '';
+	xap_only($self,
+		lc(join(' ', map { qq[$xk"$_"] } split(/[\*"\s]+/, $s))));
+	1;
+}
+
+sub subq_enter {
+	xap_only($q);
+	my $old = delete($q->{xap}) // [];
+	my $nr = push @{$q->{stack}}, $old;
+	die 'BAD deep recursion' if $nr > 10;
+	$q->{xap} = [];
+}
+
+sub subq_leave {
+	my $child = delete $q->{xap};
+	my $parent = $q->{xap} = pop @{$q->{stack}};
+	push(@$parent, @$child > 1 ? '('.join(' ', @$child).')' : $child->[0]);
+	1;
+}
+
+sub yyyymmdd ($) {
+	my ($item) = @_;
+	my ($dd, $mon, $yyyy) = split(/-/, $item->{date}, 3);
+	my $mm = $MM{$mon} // die "BAD month: $mon";
+	wantarray ? ($yyyy, $mm, sprintf('%02u', $dd))
+		: timegm(0, 0, 0, $dd, $mm - 1, $yyyy);
+}
+
+sub SENTSINCE {
+	my ($self, $item) = @_;
+	my ($yyyy, $mm, $dd) = yyyymmdd($item);
+	push @{$self->{xap}}, "d:$yyyy$mm$dd..";
+	my $sql = $self->{sql} or return 1;
+	my $ds = timegm(0, 0, 0, $dd, $mm - 1, $yyyy);
+	$$sql .= " AND ds >= $ds";
+}
+
+sub SENTON {
+	my ($self, $item) = @_;
+	my ($yyyy, $mm, $dd) = yyyymmdd($item);
+	my $ds = timegm(0, 0, 0, $dd, $mm - 1, $yyyy);
+	my $end = $ds + 86399; # no leap day
+	my $dt_end = strftime('%Y%m%d%H%M%S', gmtime($end));
+	push @{$self->{xap}}, "dt:$yyyy$mm$dd"."000000..$dt_end";
+	my $sql = $self->{sql} or return 1;
+	$$sql .= " AND ds >= $ds AND ds <= $end";
+}
+
+sub SENTBEFORE {
+	my ($self, $item) = @_;
+	my ($yyyy, $mm, $dd) = yyyymmdd($item);
+	push @{$self->{xap}}, "d:..$yyyy$mm$dd";
+	my $sql = $self->{sql} or return 1;
+	my $ds = timegm(0, 0, 0, $dd, $mm - 1, $yyyy);
+	$$sql .= " AND ds <= $ds";
+}
+
+sub ON {
+	my ($self, $item) = @_;
+	my $ts = yyyymmdd($item);
+	my $end = $ts + 86399; # no leap day
+	push @{$self->{xap}}, "ts:$ts..$end";
+	my $sql = $self->{sql} or return 1;
+	$$sql .= " AND ts >= $ts AND ts <= $end";
+}
+
+sub BEFORE {
+	my ($self, $item) = @_;
+	my $ts = yyyymmdd($item);
+	push @{$self->{xap}}, "ts:..$ts";
+	my $sql = $self->{sql} or return 1;
+	$$sql .= " AND ts <= $ts";
+}
+
+sub SINCE {
+	my ($self, $item) = @_;
+	my $ts = yyyymmdd($item);
+	push @{$self->{xap}}, "ts:$ts..";
+	my $sql = $self->{sql} or return 1;
+	$$sql .= " AND ts >= $ts";
+}
+
+sub uid_set ($$) {
+	my ($self, $seq_set) = @_;
+	if ($self->{sql}) {
+		push @{$q->{uid}}, $seq_set;
+	} else { # we've gone Xapian-only
+		uid_set_xap($self, $seq_set);
+	}
+	1;
+}
+
+sub msn_set {
+	my ($self, $seq_set) = @_;
+	PublicInbox::IMAP::msn_to_uid_range(
+		$self->{msn2uid} //= $self->{imap}->msn2uid, $seq_set);
+	uid_set($self, $seq_set);
+}
+
+my $prd = Parse::RecDescent->new(<<'EOG');
+<nocheck>
+{ my $q = $PublicInbox::IMAPsearchqp::q; }
+search_key : search_key1(s) { $return = $q }
+search_key1 : "ALL" | "RECENT" | "UNSEEN" | "NEW"
+	| OR_search_keys
+	| NOT_search_key
+	| LARGER_number
+	| SMALLER_number
+	| SENTSINCE_date
+	| SENTON_date
+	| SENTBEFORE_date
+	| SINCE_date
+	| ON_date
+	| BEFORE_date
+	| FROM_string
+	| HEADER_field_name_string
+	| TO_string
+	| CC_string
+	| BCC_string
+	| SUBJECT_string
+	| UID_set
+	| MSN_set
+	| sub_query
+	| <error>
+
+SENTSINCE_date : 'SENTSINCE' date { $q->SENTSINCE(\%item) }
+SENTON_date : 'SENTON' date { $q->SENTON(\%item) }
+SENTBEFORE_date : 'SENTBEFORE' date { $q->SENTBEFORE(\%item) }
+
+SINCE_date : 'SINCE' date { $q->SINCE(\%item) }
+ON_date : 'ON' date { $q->ON(\%item) }
+BEFORE_date : 'BEFORE' date { $q->BEFORE(\%item) }
+
+MSN_set : sequence_set { $q->msn_set($item{sequence_set}) }
+UID_set : "UID" sequence_set { $q->uid_set($item{sequence_set}) }
+LARGER_number : "LARGER" number { $q->xap_only("bytes:$item{number}..") }
+SMALLER_number : "SMALLER" number { $q->xap_only("bytes:..$item{number}") }
+# pass "NOT" through XXX is this right?
+OP_NOT : "NOT" { $q->xap_only('NOT') }
+NOT_search_key : OP_NOT search_key1
+OP_OR : "OR" {
+	$q->xap_only('OP_OR');
+	my $cur = delete $q->{xap};
+	push @{$q->{stack}}, $cur;
+	$q->{xap} = [];
+}
+search_key_a : search_key1
+{
+	my $ka = delete $q->{xap};
+	$q->{xap} = [];
+	push @{$q->{stack}}, $ka;
+}
+OR_search_keys : OP_OR search_key_a search_key1
+{
+	my $kb = delete $q->{xap};
+	my $ka = pop @{$q->{stack}};
+	my $xap = $q->{xap} = pop @{$q->{stack}};
+	my $op = pop @$xap;
+	$op eq 'OP_OR' or die "BAD expected OR: $op";
+	$ka = @$ka > 1 ? '('.join(' ', @$ka).')' : $ka->[0];
+	$kb = @$kb > 1 ? '('.join(' ', @$kb).')' : $kb->[0];
+	push @$xap, "($ka OR $kb)";
+}
+HEADER_field_name_string : "HEADER" field_name string
+{
+	$q->ih2x($item{field_name}, $item{string});
+}
+FROM_string : "FROM" string { $q->ih2x('FROM', $item{string}) }
+TO_string : "TO" string { $q->ih2x('TO', $item{string}) }
+CC_string : "CC" string { $q->ih2x('CC', $item{string}) }
+BCC_string : "BCC" string { $q->ih2x('BCC', $item{string}) }
+SUBJECT_string : "SUBJECT" string { $q->ih2x('SUBJECT', $item{string}) }
+op_subq_enter : '(' { $q->subq_enter }
+sub_query : op_subq_enter search_key1(s) ')' { $q->subq_leave }
+
+field_name : /[\x21-\x39\x3b-\x7e]+/
+string : quoted | literal
+literal : /[^"\(\) \t]+/ # bogus, I know
+quoted : /"[^"]*"/
+number : /[0-9]+/
+date : /[0123]?[0-9]-[A-Z]{3}-[0-9]{4,}/
+sequence_set : /\A[0-9][0-9,:]*[0-9\*]?\z/
+EOG
+
+sub parse {
+	my ($imap, $query) = @_;
+	my $sql = '';
+	%$q = (sql => \$sql, imap => $imap); # imap = PublicInbox::IMAP obj
+	# $::RD_TRACE = 1;
+	my $res = eval { $prd->search_key(uc($query)) };
+	return $@ if $@ && $@ =~ /\ABAD /;
+	return 'BAD unexpected result' if !$res || $res != $q;
+	if (exists $q->{sql}) {
+		delete $q->{xap};
+		if (my $uid = delete $q->{uid}) {
+			my @u;
+			for my $uid_set (@$uid) {
+				my $u = $q->{imap}->range_step(\$uid_set);
+				return $u if !ref($u);
+				push @u, "num >= $u->[0] AND num <= $u->[1]";
+			}
+			$sql .= ' AND ('.join(' OR ', @u).')';
+		}
+	} else {
+		$q->{xap} = join(' ', @{$q->{xap}});
+	}
+	delete @$q{qw(imap msn2uid)};
+	$q;
+}
+
+1
diff --git a/t/imap.t b/t/imap.t
index 83adf55338b..95bda4fa57c 100644
--- a/t/imap.t
+++ b/t/imap.t
@@ -5,25 +5,11 @@
 use strict;
 use Test::More;
 use PublicInbox::TestCommon;
-require_mods(qw(DBD::SQLite Email::Address::XS||Mail::Address));
+require_mods(qw(DBD::SQLite Email::Address::XS||Mail::Address
+	Parse::RecDescent));
 require_ok 'PublicInbox::IMAP';
 require_ok 'PublicInbox::IMAPD';
 require_git 2.6;
-use POSIX qw(strftime);
-
-{
-	my $parse_date = \&PublicInbox::IMAP::parse_date;
-	is(strftime('%Y-%m-%d', gmtime($parse_date->('02-Oct-1993'))),
-		'1993-10-02', 'parse_date works');
-	is(strftime('%Y-%m-%d', gmtime($parse_date->('2-Oct-1993'))),
-		'1993-10-02', 'parse_date works w/o leading zero');
-
-	is($parse_date->('2-10-1993'), undef, 'bad month');
-
-	# from what I can tell, RFC 3501 says nothing about date-month
-	# case-insensitivity, so be case-sensitive for now
-	is($parse_date->('02-oct-1993'), undef, 'case-sensitive month');
-}
 
 my ($tmpdir, $for_destroy) = tmpdir();
 my $cfgfile = "$tmpdir/config";
diff --git a/t/imap_searchqp.t b/t/imap_searchqp.t
new file mode 100644
index 00000000000..3e4dde6ffae
--- /dev/null
+++ b/t/imap_searchqp.t
@@ -0,0 +1,105 @@
+#!perl -w
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use Test::More;
+use Time::Local qw(timegm);
+use PublicInbox::TestCommon;
+require_mods(qw(Parse::RecDescent));
+use_ok 'PublicInbox::IMAPsearchqp';
+use_ok 'PublicInbox::IMAP';
+
+my $imap = bless {}, 'PublicInbox::IMAP';
+my $q;
+my $parse = sub { PublicInbox::IMAPsearchqp::parse($imap, $_[0]) };
+
+$q = $parse->(qq{OR HEADER TO Brian (OR FROM Ryan (OR TO Joe CC Scott))});
+is($q->{sql}, undef, 'not using SQLite for complex query');
+is($q->{xap}, '(t:"brian" OR (f:"ryan" OR (t:"joe" OR c:"scott")))',
+	'complex query matches Xapian query string');
+
+$q = $parse->(qq{HEADER CC b SENTSINCE 2-Oct-1993});
+is($q->{xap}, 'c:"b" d:19931002..', 'compound query');
+
+$q = $parse->(qq{HEADER CC B (SENTBEFORE 2-Oct-1993)});
+is($q->{xap}, 'c:"b" d:..19931002', 'compound query w/ parens');
+
+{ # limit recursion, stack and CPU cycles ain't free
+	my $n = 10;
+	my $s = ('('x$n ). 'To a' . ( ')'x$n );
+	$q = $parse->($s);
+	is($q->{xap}, 't:"a"', 'nesting works');
+	++$n;
+	$s = ('('x$n ). 'To a' . ( ')'x$n );
+	my $err = $parse->($s);
+	like($err, qr/\ABAD /, 'reject deep nesting');
+}
+
+# IMAP has at least 6 ways of interpreting a date
+{
+	my $t0 = timegm(0, 0, 0, 2, 10 - 1, 1993);
+	my $t1 = $t0 + 86399; # no leap (day|second) support
+	my $s;
+
+	$q = $parse->($s = qq{SENTBEFORE 2-Oct-1993});
+	is_deeply($q->{sql}, \" AND ds <= $t0", 'SENTBEFORE SQL');
+	$q = $parse->("FROM z $s");
+	is($q->{xap}, 'f:"z" d:..19931002', 'SENTBEFORE Xapian');
+
+	$q = $parse->($s = qq{SENTSINCE 2-Oct-1993});
+	is_deeply($q->{sql}, \" AND ds >= $t0", 'SENTSINCE SQL');
+	$q = $parse->("FROM z $s");
+	is($q->{xap}, 'f:"z" d:19931002..', 'SENTSINCE Xapian');
+
+	$q = $parse->($s = qq{SENTON 2-Oct-1993});
+	is_deeply($q->{sql}, \" AND ds >= $t0 AND ds <= $t1", 'SENTON SQL');
+	$q = $parse->("FROM z $s");
+	is($q->{xap}, 'f:"z" dt:19931002000000..19931002235959',
+		'SENTON Xapian');
+
+	$q = $parse->($s = qq{BEFORE 2-Oct-1993});
+	is_deeply($q->{sql}, \" AND ts <= $t0", 'BEFORE SQL');
+	$q = $parse->("FROM z $s");
+	is($q->{xap}, qq{f:"z" ts:..$t0}, 'BEFORE Xapian');
+
+	$q = $parse->($s = qq{SINCE 2-Oct-1993});
+	is_deeply($q->{sql}, \" AND ts >= $t0", 'SINCE SQL');
+	$q = $parse->("FROM z $s");
+	is($q->{xap}, qq{f:"z" ts:$t0..}, 'SINCE Xapian');
+
+	$q = $parse->($s = qq{ON 2-Oct-1993});
+	is_deeply($q->{sql}, \" AND ts >= $t0 AND ts <= $t1", 'ON SQL');
+	$q = $parse->("FROM z $s");
+	is($q->{xap}, qq{f:"z" ts:$t0..$t1}, 'ON Xapian');
+}
+
+{
+	$imap->{uo2m} = pack('S*', (1..50000));
+	$imap->{uid_base} = 50000;
+	my $err = $parse->(qq{9:});
+	my $s;
+
+	like($err, qr/\ABAD /, 'bad MSN range');
+	$err = $parse->(qq{UID 9:});
+	like($err, qr/\ABAD /, 'bad UID range');
+	$err = $parse->(qq{FROM x UID 9:});
+	like($err, qr/\ABAD /, 'bad UID range with Xapian');
+	$err = $parse->(qq{FROM x 9:});
+	like($err, qr/\ABAD /, 'bad UID range with Xapian');
+
+	$q = $parse->($s = qq{UID 50009:50099});
+	is_deeply($q->{sql}, \' AND (num >= 50009 AND num <= 50099)',
+		'SQL generated for UID range');
+	$q = $parse->("CC x $s");
+	is($q->{xap}, qq{c:"x" uid:50009..50099},
+		'Xapian generated for UID range');
+
+	$q = $parse->($s = qq{9:99});
+	is_deeply($q->{sql}, \' AND (num >= 50009 AND num <= 50099)',
+		'SQL generated for MSN range');
+	$q = $parse->("CC x $s");
+	is($q->{xap}, qq{c:"x" uid:50009..50099},
+		'Xapian generated for MSN range');
+}
+
+done_testing;
diff --git a/t/imapd-tls.t b/t/imapd-tls.t
index 6b3e179778c..df4ef85ca77 100644
--- a/t/imapd-tls.t
+++ b/t/imapd-tls.t
@@ -7,7 +7,7 @@ use Socket qw(IPPROTO_TCP SOL_SOCKET);
 use PublicInbox::TestCommon;
 # IO::Poll is part of the standard library, but distros may split it off...
 require_mods(qw(DBD::SQLite IO::Socket::SSL Mail::IMAPClient IO::Poll
-	Email::Address::XS||Mail::Address));
+	Email::Address::XS||Mail::Address Parse::RecDescent));
 my $imap_client = 'Mail::IMAPClient';
 $imap_client->can('starttls') or
 	plan skip_all => 'Mail::IMAPClient does not support TLS';
diff --git a/t/imapd.t b/t/imapd.t
index 36082d8c7ee..4e2c8931870 100644
--- a/t/imapd.t
+++ b/t/imapd.t
@@ -9,7 +9,7 @@ use PublicInbox::TestCommon;
 use PublicInbox::Config;
 use PublicInbox::Spawn qw(which);
 require_mods(qw(DBD::SQLite Mail::IMAPClient Mail::IMAPClient::BodyStructure
-	Email::Address::XS||Mail::Address));
+	Email::Address::XS||Mail::Address Parse::RecDescent));
 my $imap_client = 'Mail::IMAPClient';
 my $can_compress = $imap_client->can('compress');
 if ($can_compress) { # hope this gets fixed upstream, soon
@@ -122,7 +122,7 @@ $ret = $mic->search('uid 1:*') or BAIL_OUT "SEARCH FAIL $@";
 is_deeply($ret, [ 1 ], 'search UID 1:* works');
 
 SKIP: {
-	skip 'Xapian missing', 6 if $level eq 'basic';
+	skip 'Xapian missing', 7 if $level eq 'basic';
 	my $x = $mic->search(qw(smaller 99999));
 	is_deeply($x, [1], 'SMALLER works with Xapian (hit)');
 	$x = $mic->search(qw(smaller 9));
@@ -137,6 +137,11 @@ SKIP: {
 	is_deeply($x, [1], 'HEADER Message-ID works');
 	$x = $mic->search(qw(HEADER Message-ID miss));
 	is_deeply($x, [], 'HEADER Message-ID can miss');
+
+	my @q = qw[OR HEADER Message-ID testmessage@example.com
+			(OR FROM Ryan (OR TO Joe CC Scott))];
+	$x = $mic->search(join(' ', @q));
+	is_deeply($x, [1], 'nested query works');
 }
 
 is_deeply(scalar $mic->flags('1'), [], '->flags works');
@@ -357,6 +362,7 @@ EOF
 	ok($mic->examine($ng), 'EXAMINE on dummy');
 	@hits = $mic->search('SENTSINCE' => '18-Apr-2020');
 	is_deeply(\@hits, [], 'search on dummy with condition works');
+	ok(!$mic->search('SENTSINCE' => '18-Abr-2020'), 'bad month fails');
 }); # each_inbox
 
 # message sequence numbers :<
diff --git a/xt/mem-imapd-tls.t b/xt/mem-imapd-tls.t
index 648a0ad3ae9..97e67d3029a 100644
--- a/xt/mem-imapd-tls.t
+++ b/xt/mem-imapd-tls.t
@@ -9,7 +9,8 @@ use Socket qw(SOCK_STREAM IPPROTO_TCP SOL_SOCKET);
 use PublicInbox::TestCommon;
 use PublicInbox::Syscall qw(:epoll);
 use PublicInbox::DS;
-require_mods(qw(DBD::SQLite Email::Address::XS||Mail::Address));
+require_mods(qw(DBD::SQLite Email::Address::XS||Mail::Address
+	Parse::RecDescent));
 my $inboxdir = $ENV{GIANT_INBOX_DIR};
 my $TEST_TLS;
 SKIP: {

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 3/2] imap: *SEARCH: fix CHARSET handling
  2020-06-16  5:05 [PATCH 0/2] imap: search improvements Eric Wong
  2020-06-16  5:05 ` [PATCH 1/2] imap: reinstate non-UID SEARCH Eric Wong
  2020-06-16  5:05 ` [PATCH 2/2] imap: *SEARCH: use Parse::RecDescent Eric Wong
@ 2020-06-16  7:04 ` Eric Wong
  2020-06-16  7:05 ` [PATCH 4/2] imap: *SEARCH: reinstate "TEXT" search-key Eric Wong
  3 siblings, 0 replies; 5+ messages in thread
From: Eric Wong @ 2020-06-16  7:04 UTC (permalink / raw)
  To: meta

We no longer pass an arrayref to search_common() or
parse_query(), so handle the CHARSET directive in
the Parse::RecDescent-generated parser directly.
---
 lib/PublicInbox/IMAP.pm         | 23 +++++++++--------------
 lib/PublicInbox/IMAPsearchqp.pm |  8 ++++++--
 t/imap_searchqp.t               | 12 ++++++++++++
 3 files changed, 27 insertions(+), 16 deletions(-)

diff --git a/lib/PublicInbox/IMAP.pm b/lib/PublicInbox/IMAP.pm
index dd983dfd282..64b57a3ef69 100644
--- a/lib/PublicInbox/IMAP.pm
+++ b/lib/PublicInbox/IMAP.pm
@@ -1154,13 +1154,8 @@ sub xap_append ($$$$) {
 }
 
 sub parse_query ($$) {
-	my ($self, $rest) = @_;
-	if (uc($rest->[0]) eq 'CHARSET') {
-		shift @$rest;
-		defined(my $c = shift @$rest) or return 'BAD missing charset';
-		$c =~ /\A(?:UTF-8|US-ASCII)\z/ or return 'NO [BADCHARSET]';
-	}
-	my $q = PublicInbox::IMAPsearchqp::parse($self, join(' ', @$rest));
+	my ($self, $query) = @_;
+	my $q = PublicInbox::IMAPsearchqp::parse($self, $query);
 	if (ref($q)) {
 		my $max = $self->{ibx}->over->max;
 		my $beg = 1;
@@ -1202,9 +1197,9 @@ sub search_xap_range { # long_response
 }
 
 sub search_common {
-	my ($self, $tag, $rest, $want_msn) = @_;
+	my ($self, $tag, $query, $want_msn) = @_;
 	my $ibx = $self->{ibx} or return "$tag BAD No mailbox selected\r\n";
-	my $q = parse_query($self, $rest);
+	my $q = parse_query($self, $query);
 	return "$tag $q\r\n" if !ref($q);
 	my ($sql, $range_info) = delete @$q{qw(sql range_info)};
 	if (!scalar(keys %$q)) { # overview.sqlite3
@@ -1222,14 +1217,14 @@ sub search_common {
 	}
 }
 
-sub cmd_uid_search ($$$;) {
-	my ($self, $tag) = splice(@_, 0, 2);
-	search_common($self, $tag, \@_);
+sub cmd_uid_search ($$$) {
+	my ($self, $tag, $query) = @_;
+	search_common($self, $tag, $query);
 }
 
 sub cmd_search ($$$;) {
-	my ($self, $tag) = splice(@_, 0, 2);
-	search_common($self, $tag, \@_, 1);
+	my ($self, $tag, $query) = @_;
+	search_common($self, $tag, $query, 1);
 }
 
 sub args_ok ($$) { # duplicated from PublicInbox::NNTP
diff --git a/lib/PublicInbox/IMAPsearchqp.pm b/lib/PublicInbox/IMAPsearchqp.pm
index fba3baccf50..c9b442cb4fa 100644
--- a/lib/PublicInbox/IMAPsearchqp.pm
+++ b/lib/PublicInbox/IMAPsearchqp.pm
@@ -165,7 +165,7 @@ sub msn_set {
 my $prd = Parse::RecDescent->new(<<'EOG');
 <nocheck>
 { my $q = $PublicInbox::IMAPsearchqp::q; }
-search_key : search_key1(s) { $return = $q }
+search_key : CHARSET(?) search_key1(s) { $return = $q }
 search_key1 : "ALL" | "RECENT" | "UNSEEN" | "NEW"
 	| OR_search_keys
 	| NOT_search_key
@@ -188,6 +188,10 @@ search_key1 : "ALL" | "RECENT" | "UNSEEN" | "NEW"
 	| sub_query
 	| <error>
 
+charset : /\S+/
+CHARSET : 'CHARSET' charset
+{ $item{charset} =~ /\A(?:UTF-8|US-ASCII)\z/ ? 1 : die('NO [BADCHARSET]'); }
+
 SENTSINCE_date : 'SENTSINCE' date { $q->SENTSINCE(\%item) }
 SENTON_date : 'SENTON' date { $q->SENTON(\%item) }
 SENTBEFORE_date : 'SENTBEFORE' date { $q->SENTBEFORE(\%item) }
@@ -253,7 +257,7 @@ sub parse {
 	%$q = (sql => \$sql, imap => $imap); # imap = PublicInbox::IMAP obj
 	# $::RD_TRACE = 1;
 	my $res = eval { $prd->search_key(uc($query)) };
-	return $@ if $@ && $@ =~ /\ABAD /;
+	return $@ if $@ && $@ =~ /\A(?:BAD|NO) /;
 	return 'BAD unexpected result' if !$res || $res != $q;
 	if (exists $q->{sql}) {
 		delete $q->{xap};
diff --git a/t/imap_searchqp.t b/t/imap_searchqp.t
index 3e4dde6ffae..d73600b35d2 100644
--- a/t/imap_searchqp.t
+++ b/t/imap_searchqp.t
@@ -21,6 +21,18 @@ is($q->{xap}, '(t:"brian" OR (f:"ryan" OR (t:"joe" OR c:"scott")))',
 $q = $parse->(qq{HEADER CC b SENTSINCE 2-Oct-1993});
 is($q->{xap}, 'c:"b" d:19931002..', 'compound query');
 
+$q = $parse->(qq{CHARSET UTF-8 From b});
+is($q->{xap}, 'f:"b"', 'charset handled');
+$q = $parse->(qq{CHARSET WTF-8 From b});
+like($q, qr/\ANO \[/, 'bad charset rejected');
+{
+	# TODO: squelch errors by default? clients could flood logs
+	open my $fh, '>:scalar', \(my $buf) or die;
+	local *STDERR = $fh;
+	$q = $parse->(qq{CHARSET});
+}
+like($q, qr/\ABAD /, 'bad charset rejected');
+
 $q = $parse->(qq{HEADER CC B (SENTBEFORE 2-Oct-1993)});
 is($q->{xap}, 'c:"b" d:..19931002', 'compound query w/ parens');
 

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 4/2] imap: *SEARCH: reinstate "TEXT" search-key
  2020-06-16  5:05 [PATCH 0/2] imap: search improvements Eric Wong
                   ` (2 preceding siblings ...)
  2020-06-16  7:04 ` [PATCH 3/2] imap: *SEARCH: fix CHARSET handling Eric Wong
@ 2020-06-16  7:05 ` Eric Wong
  3 siblings, 0 replies; 5+ messages in thread
From: Eric Wong @ 2020-06-16  7:05 UTC (permalink / raw)
  To: meta

I accidentally dropped "TEXT" handling while porting
the IMAP search query parser to Parse::RecDescent.
This reinstates it and adds a test to prevent future
regression, and the additional test fixes a counting
error for non-Xapian-enabled systems.
---
 lib/PublicInbox/IMAPsearchqp.pm | 3 ++-
 t/imapd.t                       | 2 ++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/lib/PublicInbox/IMAPsearchqp.pm b/lib/PublicInbox/IMAPsearchqp.pm
index c9b442cb4fa..4ea99ea500b 100644
--- a/lib/PublicInbox/IMAPsearchqp.pm
+++ b/lib/PublicInbox/IMAPsearchqp.pm
@@ -16,7 +16,6 @@ my %MM = map {; $MoY[$_-1] => sprintf('%02u', $_) } (1..12);
 
 # IMAP to Xapian header search key mapping
 my %IH2X = (
-	TEXT => '',
 	SUBJECT => 's:',
 	BODY => 'b:',
 	FROM => 'f:',
@@ -183,6 +182,7 @@ search_key1 : "ALL" | "RECENT" | "UNSEEN" | "NEW"
 	| CC_string
 	| BCC_string
 	| SUBJECT_string
+	| TEXT_string
 	| UID_set
 	| MSN_set
 	| sub_query
@@ -239,6 +239,7 @@ TO_string : "TO" string { $q->ih2x('TO', $item{string}) }
 CC_string : "CC" string { $q->ih2x('CC', $item{string}) }
 BCC_string : "BCC" string { $q->ih2x('BCC', $item{string}) }
 SUBJECT_string : "SUBJECT" string { $q->ih2x('SUBJECT', $item{string}) }
+TEXT_string : "TEXT" string { $q->ih2x(undef, $item{string}) }
 op_subq_enter : '(' { $q->subq_enter }
 sub_query : op_subq_enter search_key1(s) ')' { $q->subq_leave }
 
diff --git a/t/imapd.t b/t/imapd.t
index 4e2c8931870..f9d93448fe5 100644
--- a/t/imapd.t
+++ b/t/imapd.t
@@ -398,6 +398,8 @@ SKIP: {
 	is(scalar(@$x), 1, 'MSN SEARCH on Subject works after rm');
 	$x = $mic->message_string($x->[0]);
 	is($x, $ret->{2}->{RFC822}, 'message 2 unchanged');
+	$x = $mic->search(qw(text embedded));
+	is(scalar(@$x), 1, 'MSN SEARCH on TEXT works after rm');
 }
 
 # FIXME? no EXPUNGE response, yet

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, back to index

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-06-16  5:05 [PATCH 0/2] imap: search improvements Eric Wong
2020-06-16  5:05 ` [PATCH 1/2] imap: reinstate non-UID SEARCH Eric Wong
2020-06-16  5:05 ` [PATCH 2/2] imap: *SEARCH: use Parse::RecDescent Eric Wong
2020-06-16  7:04 ` [PATCH 3/2] imap: *SEARCH: fix CHARSET handling Eric Wong
2020-06-16  7:05 ` [PATCH 4/2] imap: *SEARCH: reinstate "TEXT" search-key Eric Wong

user/dev discussion of public-inbox itself

Archives are clonable:
	git clone --mirror https://public-inbox.org/meta
	git clone --mirror http://czquwvybam4bgbro.onion/meta
	git clone --mirror http://hjrcffqmbrq6wope.onion/meta
	git clone --mirror http://ou63pmih66umazou.onion/meta

Example config snippet for mirrors

Newsgroups are available over NNTP:
	nntp://news.public-inbox.org/inbox.comp.mail.public-inbox.meta
	nntp://ou63pmih66umazou.onion/inbox.comp.mail.public-inbox.meta
	nntp://czquwvybam4bgbro.onion/inbox.comp.mail.public-inbox.meta
	nntp://hjrcffqmbrq6wope.onion/inbox.comp.mail.public-inbox.meta
	nntp://news.gmane.io/gmane.mail.public-inbox.general

 note: .onion URLs require Tor: https://www.torproject.org/

AGPL code for this site: git clone https://public-inbox.org/public-inbox.git