From c471b946ef629cf3db9043081a1aeaa189436f6b Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 11 Feb 2021 12:57:28 +0700 Subject: search: query_approxidate: cleanup regexp, more tests The cleanup doesn't seem to matter, I initially thought I needed to handle "" (two double quotes) explicitly because that's what Xapian does to escape a double quote inside a double-quoted phrase. It turns out we only need to be able to pass phrases through to Xapian unmodified, and the existing group of ["\x{201c}\x{201d}] is sufficient for our purposes. --- lib/PublicInbox/Search.pm | 2 +- t/search.t | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index 520aa31d..c5a1bd69 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -357,7 +357,7 @@ sub query_approxidate { my $DQ = qq<"\x{201c}\x{201d}>; # Xapian can use curly quotes $_[2] =~ tr/\x00/ /; # Xapian doesn't do NUL, we use it as a placeholder my ($terms, $phrase, $to_parse); - $_[2] =~ s{([^$DQ]*)([${DQ}][^\"]*[$DQ])?}{ + $_[2] =~ s{([^$DQ]*)([$DQ][^$DQ]*[$DQ])?}{ ($terms, $phrase) = ($1, $2); $terms =~ s!\b(d|rt|dt):(\S+)! date_parse_prepare($to_parse //= [], $1, $2)!sge; diff --git a/t/search.t b/t/search.t index effba1df..124c9acf 100644 --- a/t/search.t +++ b/t/search.t @@ -603,6 +603,27 @@ SKIP: { is($qs, qq[f:bob "hello world" d:19931002..20101002], 'post-phrase date corrected'); + # Xapian uses "" to escape " inside phrases, we don't explictly + # handle that, but are able to pass the result through unchanged + for my $pair (["\x{201c}", "\x{201d}"], ['"', '"']) { + my ($x, $y) = @$pair; + $orig = $qs = qq[${x}hello d:1993-10-02.."" world$y]; + $s->query_approxidate($g, $qs); + is($qs, $orig, 'phrases unchanged \x'.ord($x).'-\x'.ord($y)); + + $s->query_approxidate($g, my $tmp = "$qs d:..2010-10-02"); + is($tmp, "$orig d:..20101002", + 'two phrases did not throw off date parsing'); + + $orig = $qs = qq[${x}hello d:1993-10-02..$y$x world$y]; + $s->query_approxidate($g, $qs); + is($qs, $orig, 'phrases unchanged \x'.ord($x).'-\x'.ord($y)); + + $s->query_approxidate($g, $tmp = "$qs d:..2010-10-02"); + is($tmp, "$orig d:..20101002", + 'two phrases did not throw off date parsing'); + } + my $x_days_ago = strftime('%Y%m%d', gmtime(time - (5 * 86400))); $orig = $qs = qq[broken d:5.days.ago..]; $s->query_approxidate($g, $qs); -- cgit v1.2.3-24-ge0c7