user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
From: Eric Wong <e@yhbt.net>
To: meta@public-inbox.org
Subject: [PATCH 3/2] imap: *SEARCH: fix CHARSET handling
Date: Tue, 16 Jun 2020 07:04:26 +0000	[thread overview]
Message-ID: <20200616070426.GA24682@dcvr> (raw)
In-Reply-To: <20200616050540.13357-1-e@yhbt.net>

We no longer pass an arrayref to search_common() or
parse_query(), so handle the CHARSET directive in
the Parse::RecDescent-generated parser directly.
---
 lib/PublicInbox/IMAP.pm         | 23 +++++++++--------------
 lib/PublicInbox/IMAPsearchqp.pm |  8 ++++++--
 t/imap_searchqp.t               | 12 ++++++++++++
 3 files changed, 27 insertions(+), 16 deletions(-)

diff --git a/lib/PublicInbox/IMAP.pm b/lib/PublicInbox/IMAP.pm
index dd983dfd282..64b57a3ef69 100644
--- a/lib/PublicInbox/IMAP.pm
+++ b/lib/PublicInbox/IMAP.pm
@@ -1154,13 +1154,8 @@ sub xap_append ($$$$) {
 }
 
 sub parse_query ($$) {
-	my ($self, $rest) = @_;
-	if (uc($rest->[0]) eq 'CHARSET') {
-		shift @$rest;
-		defined(my $c = shift @$rest) or return 'BAD missing charset';
-		$c =~ /\A(?:UTF-8|US-ASCII)\z/ or return 'NO [BADCHARSET]';
-	}
-	my $q = PublicInbox::IMAPsearchqp::parse($self, join(' ', @$rest));
+	my ($self, $query) = @_;
+	my $q = PublicInbox::IMAPsearchqp::parse($self, $query);
 	if (ref($q)) {
 		my $max = $self->{ibx}->over->max;
 		my $beg = 1;
@@ -1202,9 +1197,9 @@ sub search_xap_range { # long_response
 }
 
 sub search_common {
-	my ($self, $tag, $rest, $want_msn) = @_;
+	my ($self, $tag, $query, $want_msn) = @_;
 	my $ibx = $self->{ibx} or return "$tag BAD No mailbox selected\r\n";
-	my $q = parse_query($self, $rest);
+	my $q = parse_query($self, $query);
 	return "$tag $q\r\n" if !ref($q);
 	my ($sql, $range_info) = delete @$q{qw(sql range_info)};
 	if (!scalar(keys %$q)) { # overview.sqlite3
@@ -1222,14 +1217,14 @@ sub search_common {
 	}
 }
 
-sub cmd_uid_search ($$$;) {
-	my ($self, $tag) = splice(@_, 0, 2);
-	search_common($self, $tag, \@_);
+sub cmd_uid_search ($$$) {
+	my ($self, $tag, $query) = @_;
+	search_common($self, $tag, $query);
 }
 
 sub cmd_search ($$$;) {
-	my ($self, $tag) = splice(@_, 0, 2);
-	search_common($self, $tag, \@_, 1);
+	my ($self, $tag, $query) = @_;
+	search_common($self, $tag, $query, 1);
 }
 
 sub args_ok ($$) { # duplicated from PublicInbox::NNTP
diff --git a/lib/PublicInbox/IMAPsearchqp.pm b/lib/PublicInbox/IMAPsearchqp.pm
index fba3baccf50..c9b442cb4fa 100644
--- a/lib/PublicInbox/IMAPsearchqp.pm
+++ b/lib/PublicInbox/IMAPsearchqp.pm
@@ -165,7 +165,7 @@ sub msn_set {
 my $prd = Parse::RecDescent->new(<<'EOG');
 <nocheck>
 { my $q = $PublicInbox::IMAPsearchqp::q; }
-search_key : search_key1(s) { $return = $q }
+search_key : CHARSET(?) search_key1(s) { $return = $q }
 search_key1 : "ALL" | "RECENT" | "UNSEEN" | "NEW"
 	| OR_search_keys
 	| NOT_search_key
@@ -188,6 +188,10 @@ search_key1 : "ALL" | "RECENT" | "UNSEEN" | "NEW"
 	| sub_query
 	| <error>
 
+charset : /\S+/
+CHARSET : 'CHARSET' charset
+{ $item{charset} =~ /\A(?:UTF-8|US-ASCII)\z/ ? 1 : die('NO [BADCHARSET]'); }
+
 SENTSINCE_date : 'SENTSINCE' date { $q->SENTSINCE(\%item) }
 SENTON_date : 'SENTON' date { $q->SENTON(\%item) }
 SENTBEFORE_date : 'SENTBEFORE' date { $q->SENTBEFORE(\%item) }
@@ -253,7 +257,7 @@ sub parse {
 	%$q = (sql => \$sql, imap => $imap); # imap = PublicInbox::IMAP obj
 	# $::RD_TRACE = 1;
 	my $res = eval { $prd->search_key(uc($query)) };
-	return $@ if $@ && $@ =~ /\ABAD /;
+	return $@ if $@ && $@ =~ /\A(?:BAD|NO) /;
 	return 'BAD unexpected result' if !$res || $res != $q;
 	if (exists $q->{sql}) {
 		delete $q->{xap};
diff --git a/t/imap_searchqp.t b/t/imap_searchqp.t
index 3e4dde6ffae..d73600b35d2 100644
--- a/t/imap_searchqp.t
+++ b/t/imap_searchqp.t
@@ -21,6 +21,18 @@ is($q->{xap}, '(t:"brian" OR (f:"ryan" OR (t:"joe" OR c:"scott")))',
 $q = $parse->(qq{HEADER CC b SENTSINCE 2-Oct-1993});
 is($q->{xap}, 'c:"b" d:19931002..', 'compound query');
 
+$q = $parse->(qq{CHARSET UTF-8 From b});
+is($q->{xap}, 'f:"b"', 'charset handled');
+$q = $parse->(qq{CHARSET WTF-8 From b});
+like($q, qr/\ANO \[/, 'bad charset rejected');
+{
+	# TODO: squelch errors by default? clients could flood logs
+	open my $fh, '>:scalar', \(my $buf) or die;
+	local *STDERR = $fh;
+	$q = $parse->(qq{CHARSET});
+}
+like($q, qr/\ABAD /, 'bad charset rejected');
+
 $q = $parse->(qq{HEADER CC B (SENTBEFORE 2-Oct-1993)});
 is($q->{xap}, 'c:"b" d:..19931002', 'compound query w/ parens');
 

  parent reply	other threads:[~2020-06-16  7:04 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-06-16  5:05 [PATCH 0/2] imap: search improvements Eric Wong
2020-06-16  5:05 ` [PATCH 1/2] imap: reinstate non-UID SEARCH Eric Wong
2020-06-16  5:05 ` [PATCH 2/2] imap: *SEARCH: use Parse::RecDescent Eric Wong
2020-06-16  7:04 ` Eric Wong [this message]
2020-06-16  7:05 ` [PATCH 4/2] imap: *SEARCH: reinstate "TEXT" search-key Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200616070426.GA24682@dcvr \
    --to=e@yhbt.net \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).