about summary refs log tree commit homepage
path: root/t
diff options
context:
space:
mode:
authorEric Wong <e@yhbt.net>2020-06-16 05:05:40 +0000
committerEric Wong <e@yhbt.net>2020-06-16 21:41:56 +0000
commit07caa8528db2ac22d81a0763e1cefd59acd827f1 (patch)
treeaae828ad005dc59ad66d9e7d620d81bf669ba796 /t
parent95efed60fe2d20ee4382c485e7faf58b3fee25af (diff)
downloadpublic-inbox-07caa8528db2ac22d81a0763e1cefd59acd827f1.tar.gz
For properly parsing IMAP search requests, it's easier to use a
recursive descent parser generator to deal with subqueries and
the "OR" statement.

Parse::RecDescent was chosen since it's mature, well-known,
widely available and already used by our optional dependencies:
Inline::C and Mail::IMAPClient.  While it's possible to build
Xapian queries without using the Xapian string query parser;
this iteration of the IMAP parser still builds a string which is
passed to Xapian's query parser for ease-of-diagnostics.

Since this is a recursive descent parser dealing with untrusted
inputs, subqueries have a nesting limit of 10.  I expect that is
more than adequate for real-world use.
Diffstat (limited to 't')
-rw-r--r--t/imap.t18
-rw-r--r--t/imap_searchqp.t105
-rw-r--r--t/imapd-tls.t2
-rw-r--r--t/imapd.t10
4 files changed, 116 insertions, 19 deletions
diff --git a/t/imap.t b/t/imap.t
index 83adf553..95bda4fa 100644
--- a/t/imap.t
+++ b/t/imap.t
@@ -5,25 +5,11 @@
 use strict;
 use Test::More;
 use PublicInbox::TestCommon;
-require_mods(qw(DBD::SQLite Email::Address::XS||Mail::Address));
+require_mods(qw(DBD::SQLite Email::Address::XS||Mail::Address
+        Parse::RecDescent));
 require_ok 'PublicInbox::IMAP';
 require_ok 'PublicInbox::IMAPD';
 require_git 2.6;
-use POSIX qw(strftime);
-
-{
-        my $parse_date = \&PublicInbox::IMAP::parse_date;
-        is(strftime('%Y-%m-%d', gmtime($parse_date->('02-Oct-1993'))),
-                '1993-10-02', 'parse_date works');
-        is(strftime('%Y-%m-%d', gmtime($parse_date->('2-Oct-1993'))),
-                '1993-10-02', 'parse_date works w/o leading zero');
-
-        is($parse_date->('2-10-1993'), undef, 'bad month');
-
-        # from what I can tell, RFC 3501 says nothing about date-month
-        # case-insensitivity, so be case-sensitive for now
-        is($parse_date->('02-oct-1993'), undef, 'case-sensitive month');
-}
 
 my ($tmpdir, $for_destroy) = tmpdir();
 my $cfgfile = "$tmpdir/config";
diff --git a/t/imap_searchqp.t b/t/imap_searchqp.t
new file mode 100644
index 00000000..3e4dde6f
--- /dev/null
+++ b/t/imap_searchqp.t
@@ -0,0 +1,105 @@
+#!perl -w
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use Test::More;
+use Time::Local qw(timegm);
+use PublicInbox::TestCommon;
+require_mods(qw(Parse::RecDescent));
+use_ok 'PublicInbox::IMAPsearchqp';
+use_ok 'PublicInbox::IMAP';
+
+my $imap = bless {}, 'PublicInbox::IMAP';
+my $q;
+my $parse = sub { PublicInbox::IMAPsearchqp::parse($imap, $_[0]) };
+
+$q = $parse->(qq{OR HEADER TO Brian (OR FROM Ryan (OR TO Joe CC Scott))});
+is($q->{sql}, undef, 'not using SQLite for complex query');
+is($q->{xap}, '(t:"brian" OR (f:"ryan" OR (t:"joe" OR c:"scott")))',
+        'complex query matches Xapian query string');
+
+$q = $parse->(qq{HEADER CC b SENTSINCE 2-Oct-1993});
+is($q->{xap}, 'c:"b" d:19931002..', 'compound query');
+
+$q = $parse->(qq{HEADER CC B (SENTBEFORE 2-Oct-1993)});
+is($q->{xap}, 'c:"b" d:..19931002', 'compound query w/ parens');
+
+{ # limit recursion, stack and CPU cycles ain't free
+        my $n = 10;
+        my $s = ('('x$n ). 'To a' . ( ')'x$n );
+        $q = $parse->($s);
+        is($q->{xap}, 't:"a"', 'nesting works');
+        ++$n;
+        $s = ('('x$n ). 'To a' . ( ')'x$n );
+        my $err = $parse->($s);
+        like($err, qr/\ABAD /, 'reject deep nesting');
+}
+
+# IMAP has at least 6 ways of interpreting a date
+{
+        my $t0 = timegm(0, 0, 0, 2, 10 - 1, 1993);
+        my $t1 = $t0 + 86399; # no leap (day|second) support
+        my $s;
+
+        $q = $parse->($s = qq{SENTBEFORE 2-Oct-1993});
+        is_deeply($q->{sql}, \" AND ds <= $t0", 'SENTBEFORE SQL');
+        $q = $parse->("FROM z $s");
+        is($q->{xap}, 'f:"z" d:..19931002', 'SENTBEFORE Xapian');
+
+        $q = $parse->($s = qq{SENTSINCE 2-Oct-1993});
+        is_deeply($q->{sql}, \" AND ds >= $t0", 'SENTSINCE SQL');
+        $q = $parse->("FROM z $s");
+        is($q->{xap}, 'f:"z" d:19931002..', 'SENTSINCE Xapian');
+
+        $q = $parse->($s = qq{SENTON 2-Oct-1993});
+        is_deeply($q->{sql}, \" AND ds >= $t0 AND ds <= $t1", 'SENTON SQL');
+        $q = $parse->("FROM z $s");
+        is($q->{xap}, 'f:"z" dt:19931002000000..19931002235959',
+                'SENTON Xapian');
+
+        $q = $parse->($s = qq{BEFORE 2-Oct-1993});
+        is_deeply($q->{sql}, \" AND ts <= $t0", 'BEFORE SQL');
+        $q = $parse->("FROM z $s");
+        is($q->{xap}, qq{f:"z" ts:..$t0}, 'BEFORE Xapian');
+
+        $q = $parse->($s = qq{SINCE 2-Oct-1993});
+        is_deeply($q->{sql}, \" AND ts >= $t0", 'SINCE SQL');
+        $q = $parse->("FROM z $s");
+        is($q->{xap}, qq{f:"z" ts:$t0..}, 'SINCE Xapian');
+
+        $q = $parse->($s = qq{ON 2-Oct-1993});
+        is_deeply($q->{sql}, \" AND ts >= $t0 AND ts <= $t1", 'ON SQL');
+        $q = $parse->("FROM z $s");
+        is($q->{xap}, qq{f:"z" ts:$t0..$t1}, 'ON Xapian');
+}
+
+{
+        $imap->{uo2m} = pack('S*', (1..50000));
+        $imap->{uid_base} = 50000;
+        my $err = $parse->(qq{9:});
+        my $s;
+
+        like($err, qr/\ABAD /, 'bad MSN range');
+        $err = $parse->(qq{UID 9:});
+        like($err, qr/\ABAD /, 'bad UID range');
+        $err = $parse->(qq{FROM x UID 9:});
+        like($err, qr/\ABAD /, 'bad UID range with Xapian');
+        $err = $parse->(qq{FROM x 9:});
+        like($err, qr/\ABAD /, 'bad UID range with Xapian');
+
+        $q = $parse->($s = qq{UID 50009:50099});
+        is_deeply($q->{sql}, \' AND (num >= 50009 AND num <= 50099)',
+                'SQL generated for UID range');
+        $q = $parse->("CC x $s");
+        is($q->{xap}, qq{c:"x" uid:50009..50099},
+                'Xapian generated for UID range');
+
+        $q = $parse->($s = qq{9:99});
+        is_deeply($q->{sql}, \' AND (num >= 50009 AND num <= 50099)',
+                'SQL generated for MSN range');
+        $q = $parse->("CC x $s");
+        is($q->{xap}, qq{c:"x" uid:50009..50099},
+                'Xapian generated for MSN range');
+}
+
+done_testing;
diff --git a/t/imapd-tls.t b/t/imapd-tls.t
index 6b3e1797..df4ef85c 100644
--- a/t/imapd-tls.t
+++ b/t/imapd-tls.t
@@ -7,7 +7,7 @@ use Socket qw(IPPROTO_TCP SOL_SOCKET);
 use PublicInbox::TestCommon;
 # IO::Poll is part of the standard library, but distros may split it off...
 require_mods(qw(DBD::SQLite IO::Socket::SSL Mail::IMAPClient IO::Poll
-        Email::Address::XS||Mail::Address));
+        Email::Address::XS||Mail::Address Parse::RecDescent));
 my $imap_client = 'Mail::IMAPClient';
 $imap_client->can('starttls') or
         plan skip_all => 'Mail::IMAPClient does not support TLS';
diff --git a/t/imapd.t b/t/imapd.t
index 36082d8c..4e2c8931 100644
--- a/t/imapd.t
+++ b/t/imapd.t
@@ -9,7 +9,7 @@ use PublicInbox::TestCommon;
 use PublicInbox::Config;
 use PublicInbox::Spawn qw(which);
 require_mods(qw(DBD::SQLite Mail::IMAPClient Mail::IMAPClient::BodyStructure
-        Email::Address::XS||Mail::Address));
+        Email::Address::XS||Mail::Address Parse::RecDescent));
 my $imap_client = 'Mail::IMAPClient';
 my $can_compress = $imap_client->can('compress');
 if ($can_compress) { # hope this gets fixed upstream, soon
@@ -122,7 +122,7 @@ $ret = $mic->search('uid 1:*') or BAIL_OUT "SEARCH FAIL $@";
 is_deeply($ret, [ 1 ], 'search UID 1:* works');
 
 SKIP: {
-        skip 'Xapian missing', 6 if $level eq 'basic';
+        skip 'Xapian missing', 7 if $level eq 'basic';
         my $x = $mic->search(qw(smaller 99999));
         is_deeply($x, [1], 'SMALLER works with Xapian (hit)');
         $x = $mic->search(qw(smaller 9));
@@ -137,6 +137,11 @@ SKIP: {
         is_deeply($x, [1], 'HEADER Message-ID works');
         $x = $mic->search(qw(HEADER Message-ID miss));
         is_deeply($x, [], 'HEADER Message-ID can miss');
+
+        my @q = qw[OR HEADER Message-ID testmessage@example.com
+                        (OR FROM Ryan (OR TO Joe CC Scott))];
+        $x = $mic->search(join(' ', @q));
+        is_deeply($x, [1], 'nested query works');
 }
 
 is_deeply(scalar $mic->flags('1'), [], '->flags works');
@@ -357,6 +362,7 @@ EOF
         ok($mic->examine($ng), 'EXAMINE on dummy');
         @hits = $mic->search('SENTSINCE' => '18-Apr-2020');
         is_deeply(\@hits, [], 'search on dummy with condition works');
+        ok(!$mic->search('SENTSINCE' => '18-Abr-2020'), 'bad month fails');
 }); # each_inbox
 
 # message sequence numbers :<