diff options
author | Eric Wong <e@80x24.org> | 2021-02-11 12:57:28 +0700 |
---|---|---|
committer | Eric Wong <e@80x24.org> | 2021-02-12 22:58:50 -0400 |
commit | c471b946ef629cf3db9043081a1aeaa189436f6b (patch) | |
tree | 88d9a38b81b6d32ddfba81158b004cc481607f05 | |
parent | 376778b910cdf787d6e08cfd11acab26118899f4 (diff) | |
download | public-inbox-c471b946ef629cf3db9043081a1aeaa189436f6b.tar.gz |
The cleanup doesn't seem to matter, I initially thought I needed to handle "" (two double quotes) explicitly because that's what Xapian does to escape a double quote inside a double-quoted phrase. It turns out we only need to be able to pass phrases through to Xapian unmodified, and the existing group of ["\x{201c}\x{201d}] is sufficient for our purposes.
-rw-r--r-- | lib/PublicInbox/Search.pm | 2 | ||||
-rw-r--r-- | t/search.t | 21 |
2 files changed, 22 insertions, 1 deletions
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index 520aa31d..c5a1bd69 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -357,7 +357,7 @@ sub query_approxidate { my $DQ = qq<"\x{201c}\x{201d}>; # Xapian can use curly quotes $_[2] =~ tr/\x00/ /; # Xapian doesn't do NUL, we use it as a placeholder my ($terms, $phrase, $to_parse); - $_[2] =~ s{([^$DQ]*)([${DQ}][^\"]*[$DQ])?}{ + $_[2] =~ s{([^$DQ]*)([$DQ][^$DQ]*[$DQ])?}{ ($terms, $phrase) = ($1, $2); $terms =~ s!\b(d|rt|dt):(\S+)! date_parse_prepare($to_parse //= [], $1, $2)!sge; @@ -603,6 +603,27 @@ SKIP: { is($qs, qq[f:bob "hello world" d:19931002..20101002], 'post-phrase date corrected'); + # Xapian uses "" to escape " inside phrases, we don't explictly + # handle that, but are able to pass the result through unchanged + for my $pair (["\x{201c}", "\x{201d}"], ['"', '"']) { + my ($x, $y) = @$pair; + $orig = $qs = qq[${x}hello d:1993-10-02.."" world$y]; + $s->query_approxidate($g, $qs); + is($qs, $orig, 'phrases unchanged \x'.ord($x).'-\x'.ord($y)); + + $s->query_approxidate($g, my $tmp = "$qs d:..2010-10-02"); + is($tmp, "$orig d:..20101002", + 'two phrases did not throw off date parsing'); + + $orig = $qs = qq[${x}hello d:1993-10-02..$y$x world$y]; + $s->query_approxidate($g, $qs); + is($qs, $orig, 'phrases unchanged \x'.ord($x).'-\x'.ord($y)); + + $s->query_approxidate($g, $tmp = "$qs d:..2010-10-02"); + is($tmp, "$orig d:..20101002", + 'two phrases did not throw off date parsing'); + } + my $x_days_ago = strftime('%Y%m%d', gmtime(time - (5 * 86400))); $orig = $qs = qq[broken d:5.days.ago..]; $s->query_approxidate($g, $qs); |