diff options
author | Eric Wong <e@yhbt.net> | 2020-06-16 05:05:40 +0000 |
---|---|---|
committer | Eric Wong <e@yhbt.net> | 2020-06-16 21:41:56 +0000 |
commit | 07caa8528db2ac22d81a0763e1cefd59acd827f1 (patch) | |
tree | aae828ad005dc59ad66d9e7d620d81bf669ba796 /t | |
parent | 95efed60fe2d20ee4382c485e7faf58b3fee25af (diff) | |
download | public-inbox-07caa8528db2ac22d81a0763e1cefd59acd827f1.tar.gz |
For properly parsing IMAP search requests, it's easier to use a recursive descent parser generator to deal with subqueries and the "OR" statement. Parse::RecDescent was chosen since it's mature, well-known, widely available and already used by our optional dependencies: Inline::C and Mail::IMAPClient. While it's possible to build Xapian queries without using the Xapian string query parser; this iteration of the IMAP parser still builds a string which is passed to Xapian's query parser for ease-of-diagnostics. Since this is a recursive descent parser dealing with untrusted inputs, subqueries have a nesting limit of 10. I expect that is more than adequate for real-world use.
Diffstat (limited to 't')
-rw-r--r-- | t/imap.t | 18 | ||||
-rw-r--r-- | t/imap_searchqp.t | 105 | ||||
-rw-r--r-- | t/imapd-tls.t | 2 | ||||
-rw-r--r-- | t/imapd.t | 10 |
4 files changed, 116 insertions, 19 deletions
@@ -5,25 +5,11 @@ use strict; use Test::More; use PublicInbox::TestCommon; -require_mods(qw(DBD::SQLite Email::Address::XS||Mail::Address)); +require_mods(qw(DBD::SQLite Email::Address::XS||Mail::Address + Parse::RecDescent)); require_ok 'PublicInbox::IMAP'; require_ok 'PublicInbox::IMAPD'; require_git 2.6; -use POSIX qw(strftime); - -{ - my $parse_date = \&PublicInbox::IMAP::parse_date; - is(strftime('%Y-%m-%d', gmtime($parse_date->('02-Oct-1993'))), - '1993-10-02', 'parse_date works'); - is(strftime('%Y-%m-%d', gmtime($parse_date->('2-Oct-1993'))), - '1993-10-02', 'parse_date works w/o leading zero'); - - is($parse_date->('2-10-1993'), undef, 'bad month'); - - # from what I can tell, RFC 3501 says nothing about date-month - # case-insensitivity, so be case-sensitive for now - is($parse_date->('02-oct-1993'), undef, 'case-sensitive month'); -} my ($tmpdir, $for_destroy) = tmpdir(); my $cfgfile = "$tmpdir/config"; diff --git a/t/imap_searchqp.t b/t/imap_searchqp.t new file mode 100644 index 00000000..3e4dde6f --- /dev/null +++ b/t/imap_searchqp.t @@ -0,0 +1,105 @@ +#!perl -w +# Copyright (C) 2020 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +use strict; +use Test::More; +use Time::Local qw(timegm); +use PublicInbox::TestCommon; +require_mods(qw(Parse::RecDescent)); +use_ok 'PublicInbox::IMAPsearchqp'; +use_ok 'PublicInbox::IMAP'; + +my $imap = bless {}, 'PublicInbox::IMAP'; +my $q; +my $parse = sub { PublicInbox::IMAPsearchqp::parse($imap, $_[0]) }; + +$q = $parse->(qq{OR HEADER TO Brian (OR FROM Ryan (OR TO Joe CC Scott))}); +is($q->{sql}, undef, 'not using SQLite for complex query'); +is($q->{xap}, '(t:"brian" OR (f:"ryan" OR (t:"joe" OR c:"scott")))', + 'complex query matches Xapian query string'); + +$q = $parse->(qq{HEADER CC b SENTSINCE 2-Oct-1993}); +is($q->{xap}, 'c:"b" d:19931002..', 'compound query'); + +$q = $parse->(qq{HEADER CC B (SENTBEFORE 2-Oct-1993)}); +is($q->{xap}, 'c:"b" d:..19931002', 'compound query w/ parens'); + +{ # limit recursion, stack and CPU cycles ain't free + my $n = 10; + my $s = ('('x$n ). 'To a' . ( ')'x$n ); + $q = $parse->($s); + is($q->{xap}, 't:"a"', 'nesting works'); + ++$n; + $s = ('('x$n ). 'To a' . ( ')'x$n ); + my $err = $parse->($s); + like($err, qr/\ABAD /, 'reject deep nesting'); +} + +# IMAP has at least 6 ways of interpreting a date +{ + my $t0 = timegm(0, 0, 0, 2, 10 - 1, 1993); + my $t1 = $t0 + 86399; # no leap (day|second) support + my $s; + + $q = $parse->($s = qq{SENTBEFORE 2-Oct-1993}); + is_deeply($q->{sql}, \" AND ds <= $t0", 'SENTBEFORE SQL'); + $q = $parse->("FROM z $s"); + is($q->{xap}, 'f:"z" d:..19931002', 'SENTBEFORE Xapian'); + + $q = $parse->($s = qq{SENTSINCE 2-Oct-1993}); + is_deeply($q->{sql}, \" AND ds >= $t0", 'SENTSINCE SQL'); + $q = $parse->("FROM z $s"); + is($q->{xap}, 'f:"z" d:19931002..', 'SENTSINCE Xapian'); + + $q = $parse->($s = qq{SENTON 2-Oct-1993}); + is_deeply($q->{sql}, \" AND ds >= $t0 AND ds <= $t1", 'SENTON SQL'); + $q = $parse->("FROM z $s"); + is($q->{xap}, 'f:"z" dt:19931002000000..19931002235959', + 'SENTON Xapian'); + + $q = $parse->($s = qq{BEFORE 2-Oct-1993}); + is_deeply($q->{sql}, \" AND ts <= $t0", 'BEFORE SQL'); + $q = $parse->("FROM z $s"); + is($q->{xap}, qq{f:"z" ts:..$t0}, 'BEFORE Xapian'); + + $q = $parse->($s = qq{SINCE 2-Oct-1993}); + is_deeply($q->{sql}, \" AND ts >= $t0", 'SINCE SQL'); + $q = $parse->("FROM z $s"); + is($q->{xap}, qq{f:"z" ts:$t0..}, 'SINCE Xapian'); + + $q = $parse->($s = qq{ON 2-Oct-1993}); + is_deeply($q->{sql}, \" AND ts >= $t0 AND ts <= $t1", 'ON SQL'); + $q = $parse->("FROM z $s"); + is($q->{xap}, qq{f:"z" ts:$t0..$t1}, 'ON Xapian'); +} + +{ + $imap->{uo2m} = pack('S*', (1..50000)); + $imap->{uid_base} = 50000; + my $err = $parse->(qq{9:}); + my $s; + + like($err, qr/\ABAD /, 'bad MSN range'); + $err = $parse->(qq{UID 9:}); + like($err, qr/\ABAD /, 'bad UID range'); + $err = $parse->(qq{FROM x UID 9:}); + like($err, qr/\ABAD /, 'bad UID range with Xapian'); + $err = $parse->(qq{FROM x 9:}); + like($err, qr/\ABAD /, 'bad UID range with Xapian'); + + $q = $parse->($s = qq{UID 50009:50099}); + is_deeply($q->{sql}, \' AND (num >= 50009 AND num <= 50099)', + 'SQL generated for UID range'); + $q = $parse->("CC x $s"); + is($q->{xap}, qq{c:"x" uid:50009..50099}, + 'Xapian generated for UID range'); + + $q = $parse->($s = qq{9:99}); + is_deeply($q->{sql}, \' AND (num >= 50009 AND num <= 50099)', + 'SQL generated for MSN range'); + $q = $parse->("CC x $s"); + is($q->{xap}, qq{c:"x" uid:50009..50099}, + 'Xapian generated for MSN range'); +} + +done_testing; diff --git a/t/imapd-tls.t b/t/imapd-tls.t index 6b3e1797..df4ef85c 100644 --- a/t/imapd-tls.t +++ b/t/imapd-tls.t @@ -7,7 +7,7 @@ use Socket qw(IPPROTO_TCP SOL_SOCKET); use PublicInbox::TestCommon; # IO::Poll is part of the standard library, but distros may split it off... require_mods(qw(DBD::SQLite IO::Socket::SSL Mail::IMAPClient IO::Poll - Email::Address::XS||Mail::Address)); + Email::Address::XS||Mail::Address Parse::RecDescent)); my $imap_client = 'Mail::IMAPClient'; $imap_client->can('starttls') or plan skip_all => 'Mail::IMAPClient does not support TLS'; @@ -9,7 +9,7 @@ use PublicInbox::TestCommon; use PublicInbox::Config; use PublicInbox::Spawn qw(which); require_mods(qw(DBD::SQLite Mail::IMAPClient Mail::IMAPClient::BodyStructure - Email::Address::XS||Mail::Address)); + Email::Address::XS||Mail::Address Parse::RecDescent)); my $imap_client = 'Mail::IMAPClient'; my $can_compress = $imap_client->can('compress'); if ($can_compress) { # hope this gets fixed upstream, soon @@ -122,7 +122,7 @@ $ret = $mic->search('uid 1:*') or BAIL_OUT "SEARCH FAIL $@"; is_deeply($ret, [ 1 ], 'search UID 1:* works'); SKIP: { - skip 'Xapian missing', 6 if $level eq 'basic'; + skip 'Xapian missing', 7 if $level eq 'basic'; my $x = $mic->search(qw(smaller 99999)); is_deeply($x, [1], 'SMALLER works with Xapian (hit)'); $x = $mic->search(qw(smaller 9)); @@ -137,6 +137,11 @@ SKIP: { is_deeply($x, [1], 'HEADER Message-ID works'); $x = $mic->search(qw(HEADER Message-ID miss)); is_deeply($x, [], 'HEADER Message-ID can miss'); + + my @q = qw[OR HEADER Message-ID testmessage@example.com + (OR FROM Ryan (OR TO Joe CC Scott))]; + $x = $mic->search(join(' ', @q)); + is_deeply($x, [1], 'nested query works'); } is_deeply(scalar $mic->flags('1'), [], '->flags works'); @@ -357,6 +362,7 @@ EOF ok($mic->examine($ng), 'EXAMINE on dummy'); @hits = $mic->search('SENTSINCE' => '18-Apr-2020'); is_deeply(\@hits, [], 'search on dummy with condition works'); + ok(!$mic->search('SENTSINCE' => '18-Abr-2020'), 'bad month fails'); }); # each_inbox # message sequence numbers :< |