user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download mbox.gz: |
* [PATCH] search: fix argv handling of quoted phrases
  2021-02-08  9:05  6% ` [PATCH 11/13] lei q: use git approxidate with d:, dt: and rt: ranges Eric Wong
@ 2021-02-10  9:59  7%   ` Eric Wong
  0 siblings, 0 replies; 3+ results
From: Eric Wong @ 2021-02-10  9:59 UTC (permalink / raw)
  To: meta

This fixes both an old bug in "lei q" argv handling and one
recent regression introduced with the change to use approxidate.

Field prefixes are also handled correctly inside parenthesized
statements when the field follows "(" without a separation
character.

Fixes: fbb7ccabbf54a405 ("lei q: use git approxidate with d:, dt: and rt: ranges")
---
 lib/PublicInbox/Search.pm |  4 +++-
 t/search.t                | 11 +++++++++--
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 76a270bc..b3fd532d 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -330,8 +330,10 @@ sub query_argv_to_string {
 		if (s!\b(d|rt|dt):([[:print:]]+)\z!date_parse_prepare(
 						$to_parse //= [], $1, $2)!sge) {
 			$_;
+		} elsif (/\s/) {
+			s/(.*?)\b(\w+:)// ? qq{$1$2"$_"} : qq{"$_"};
 		} else {
-			/\s/ ? (s/\A(\w+:)// ? qq{$1"$_"} : qq{"$_}) : $_
+			$_
 		}
 	} @$argv);
 	# git-rev-parse can handle any number of args up to system
diff --git a/t/search.t b/t/search.t
index 36a8fb30..bcfe91f5 100644
--- a/t/search.t
+++ b/t/search.t
@@ -536,13 +536,20 @@ $ibx->with_umask(sub {
 });
 
 SKIP: {
+	my ($s, $g) = ($ibx->search, $ibx->git);
+	my $q = $s->query_argv_to_string($g, ["quoted phrase"]);
+	is($q, q["quoted phrase"], 'quoted phrase');
+	$q = $s->query_argv_to_string($g, ['s:pa ce']);
+	is($q, q[s:"pa ce"], 'space with prefix');
+	$q = $s->query_argv_to_string($g, ["\(s:pa ce", "AND", "foo\)"]);
+	is($q, q[(s:"pa ce" AND foo)], 'space AND foo');
+
 	local $ENV{TZ} = 'UTC';
 	my $now = strftime('%H:%M:%S', gmtime(time));
 	if ($now =~ /\A23:(?:59|60)/ || $now =~ /\A00:00:0[01]\z/) {
 		skip 'too close to midnight, time is tricky', 6;
 	}
-	my ($s, $g) = ($ibx->search, $ibx->git);
-	my $q = $s->query_argv_to_string($g, [qw(d:20101002 blah)]);
+	$q = $s->query_argv_to_string($g, [qw(d:20101002 blah)]);
 	is($q, 'd:20101002..20101003 blah', 'YYYYMMDD expanded to range');
 	$q = $s->query_argv_to_string($g, [qw(d:2010-10-02)]);
 	is($q, 'd:20101002..20101003', 'YYYY-MM-DD expanded to range');

^ permalink raw reply related	[relevance 7%]

* [PATCH 11/13] lei q: use git approxidate with d:, dt: and rt: ranges
  2021-02-08  9:05  6% [PATCH 00/13] lei approxidate, startup fix, --alert Eric Wong
@ 2021-02-08  9:05  6% ` Eric Wong
  2021-02-10  9:59  7%   ` [PATCH] search: fix argv handling of quoted phrases Eric Wong
  0 siblings, 1 reply; 3+ results
From: Eric Wong @ 2021-02-08  9:05 UTC (permalink / raw)
  To: meta

Instead of having --(sent|received)-(before|after)=s
command-line switches, we'll just try to make sense of argv so
it's usable within parenthesized statements and such.

Given the negligible performance penalty with Inline::C
process spawning, we'll probably wire this up to the
WWW interface, too.

"d:" is for mairix compatibility.  I don't know if "dt:" and
"rt:" will be too useful, but they exist because of IMAP
(and JMAP).
---
 lib/PublicInbox/LeiQuery.pm | 12 +++----
 lib/PublicInbox/Search.pm   | 67 +++++++++++++++++++++++++++++++++++++
 t/search.t                  | 44 ++++++++++++++++++++++++
 3 files changed, 115 insertions(+), 8 deletions(-)

diff --git a/lib/PublicInbox/LeiQuery.pm b/lib/PublicInbox/LeiQuery.pm
index 9a6fa718..d637b1ae 100644
--- a/lib/PublicInbox/LeiQuery.pm
+++ b/lib/PublicInbox/LeiQuery.pm
@@ -34,9 +34,10 @@ sub lei_q {
 	my @only = @{$opt->{only} // []};
 	# --local is enabled by default unless --only is used
 	# we'll allow "--only $LOCATION --local"
+	my $sto = $self->_lei_store(1);
+	my $lse = $sto->search;
 	if ($opt->{'local'} //= scalar(@only) ? 0 : 1) {
-		my $sto = $self->_lei_store(1);
-		$lxs->prepare_external($sto->search);
+		$lxs->prepare_external($lse);
 	}
 	if (@only) {
 		for my $loc (@only) {
@@ -107,12 +108,7 @@ no query allowed on command-line with --stdin
 		PublicInbox::InputPipe::consume($self->{0}, \&qstr_add, $self);
 		return;
 	}
-	# Consider spaces in argv to be for phrase search in Xapian.
-	# In other words, the users should need only care about
-	# normal shell quotes and not have to learn Xapian quoting.
-	$mset_opt{qstr} = join(' ', map {;
-		/\s/ ? (s/\A(\w+:)// ? qq{$1"$_"} : qq{"$_"}) : $_
-	} @argv);
+	$mset_opt{qstr} = $lse->query_argv_to_string($lse->git, \@argv);
 	$lxs->do_query($self);
 }
 
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index dbae3bc5..f42d70e3 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -8,6 +8,7 @@ use strict;
 use parent qw(Exporter);
 our @EXPORT_OK = qw(retry_reopen int_val get_pct xap_terms);
 use List::Util qw(max);
+use POSIX qw(strftime);
 
 # values for searching, changing the numeric value breaks
 # compatibility with old indices (so don't change them it)
@@ -259,6 +260,72 @@ sub reopen {
 	$self; # make chaining easier
 }
 
+# Convert git "approxidate" ranges to something usable with our
+# Xapian indices.  At the moment, Xapian only offers a C++-only API
+# and neither the SWIG nor XS bindings allow us to use custom code
+# to parse dates (and libgit2 doesn't expose git__date_parse, either,
+# so we're running git-rev-parse(1)).
+sub date_range {
+	my ($git, $pfx, $range) = @_;
+	# are we inside a parenthesized statement?
+	my $end = $range =~ s/([\)\s]*)\z// ? $1 : '';
+	my @r = split(/\.\./, $range, 2);
+
+	# expand "d:20101002" => "d:20101002..20101003" and like
+	# n.b. git doesn't do YYYYMMDD w/o '-', it needs YYYY-MM-DD
+	if ($pfx eq 'd') {
+		if (!defined($r[1])) {
+			$r[0] =~ s/\A([0-9]{4})([0-9]{2})([0-9]{2})\z/$1-$2-$3/;
+			$r[0] = $git->date_parse($r[0]);
+			$r[1] = $r[0] + 86400;
+			for my $x (@r) {
+				$x = strftime('%Y%m%d', gmtime($x));
+			}
+		} else {
+			for my $x (@r) {
+				next if $x eq '' || $x =~ /\A[0-9]{8}\z/;
+				$x = strftime('%Y%m%d',
+						gmtime($git->date_parse($x)));
+			}
+		}
+	} elsif ($pfx eq 'dt') {
+		if (!defined($r[1])) { # git needs gaps and not /\d{14}/
+			$r[0] =~ s/\A([0-9]{4})([0-9]{2})([0-9]{2})
+					([0-9]{2})([0-9]{2})([0-9]{2})\z
+				/$1-$2-$3 $4:$5:$6/x;
+			$r[0] = $git->date_parse($r[0]);
+			$r[1] = $r[0] + 86400;
+			for my $x (@r) {
+				$x = strftime('%Y%m%d%H%M%S', gmtime($x));
+			}
+		} else {
+			for my $x (@r) {
+				next if $x eq '' || $x =~ /\A[0-9]{14}\z/;
+				$x = strftime('%Y%m%d%H%M%S',
+						gmtime($git->date_parse($x)));
+			}
+		}
+	} else { # "rt", let git interpret "YYYY", deal with Y10K later :P
+		for my $x (@r) {
+			next if $x eq '' || $x =~ /\A[0-9]{5,}\z/;
+			$x = $git->date_parse($x);
+		}
+		$r[1] //= $r[0] + 86400;
+	}
+	"$pfx:".join('..', @r).$end;
+}
+
+sub query_argv_to_string {
+	my (undef, $git, $argv) = @_;
+	join(' ', map {;
+		if (s!\b(d|rt|dt):(.+)\z!date_range($git, $1, $2)!sge) {
+			$_;
+		} else {
+			/\s/ ? (s/\A(\w+:)// ? qq{$1"$_"} : qq{"$_}) : $_
+		}
+	} @$argv);
+}
+
 # read-only
 sub mset {
 	my ($self, $query_string, $opts) = @_;
diff --git a/t/search.t b/t/search.t
index b2958c00..56c7db1c 100644
--- a/t/search.t
+++ b/t/search.t
@@ -9,6 +9,7 @@ require PublicInbox::SearchIdx;
 require PublicInbox::Inbox;
 require PublicInbox::InboxWritable;
 use PublicInbox::Eml;
+use POSIX qw(strftime);
 my ($tmpdir, $for_destroy) = tmpdir();
 my $git_dir = "$tmpdir/a.git";
 my $ibx = PublicInbox::Inbox->new({ inboxdir => $git_dir });
@@ -534,4 +535,47 @@ $ibx->with_umask(sub {
 		'Subject search reaches inside message/rfc822');
 });
 
+SKIP: {
+	local $ENV{TZ} = 'UTC';
+	my $now = strftime('%H:%M:%S', gmtime(time));
+	if ($now =~ /\A23:(?:59|60)/ || $now =~ /\A00:00:0[01]\z/) {
+		skip 'too close to midnight, time is tricky', 6;
+	}
+	my ($s, $g) = ($ibx->search, $ibx->git);
+	my $q = $s->query_argv_to_string($g, [qw(d:20101002 blah)]);
+	is($q, 'd:20101002..20101003 blah', 'YYYYMMDD expanded to range');
+	$q = $s->query_argv_to_string($g, [qw(d:2010-10-02)]);
+	is($q, 'd:20101002..20101003', 'YYYY-MM-DD expanded to range');
+	$q = $s->query_argv_to_string($g, [qw(rt:2010-10-02.. yy)]);
+	$q =~ /\Art:(\d+)\.\. yy/ or fail("rt: expansion failed: $q");
+	is(strftime('%Y-%m-%d', gmtime($1//0)), '2010-10-02', 'rt: beg expand');
+	$q = $s->query_argv_to_string($g, [qw(rt:..2010-10-02 zz)]);
+	$q =~ /\Art:\.\.(\d+) zz/ or fail("rt: expansion failed: $q");
+	is(strftime('%Y-%m-%d', gmtime($1//0)), '2010-10-02', 'rt: end expand');
+	$q = $s->query_argv_to_string($g, [qw(something dt:2010-10-02..)]);
+	like($q, qr/\Asomething dt:20101002\d{6}\.\./, 'dt: expansion');
+	$q = $s->query_argv_to_string($g, [qw(x d:yesterday.. y)]);
+	is($q, strftime('x d:%Y%m%d.. y', gmtime(time - 86400)),
+		'"yesterday" handled');
+	$q = $s->query_argv_to_string($g, [qw(x dt:20101002054123)]);
+	is($q, 'x dt:20101002054123..20101003054123', 'single dt: expanded');
+	$q = $s->query_argv_to_string($g, [qw(x dt:2010-10-02T05:41:23Z)]);
+	is($q, 'x dt:20101002054123..20101003054123', 'ISO8601 dt: expanded');
+	$q = $s->query_argv_to_string($g, [qw(rt:1970..1971)]);
+	$q =~ /\Art:(\d+)\.\.(\d+)\z/ or fail "YYYY rt: expansion: $q";
+	my ($beg, $end) = ($1, $2);
+	is(strftime('%Y', gmtime($beg)), 1970, 'rt: starts at 1970');
+	is(strftime('%Y', gmtime($end)), 1971, 'rt: ends at 1971');
+	$q = $s->query_argv_to_string($g, [qw(rt:1970-01-01)]);
+	$q =~ /\Art:(\d+)\.\.(\d+)\z/ or fail "YYYY-MM-DD rt: expansion: $q";
+	($beg, $end) = ($1, $2);
+	is(strftime('%Y-%m-%d', gmtime($beg)), '1970-01-01',
+			'rt: date-only w/o range');
+	is(strftime('%Y-%m-%d', gmtime($end)), '1970-01-02',
+			'rt: date-only auto-end');
+	$q = $s->query_argv_to_string($g, [qw{OR (rt:1993-10-02)}]);
+	like($q, qr/\AOR \(rt:749\d{6}\.\.749\d{6}\)\z/,
+		'trailing parentheses preserved');
+}
+
 done_testing();

^ permalink raw reply related	[relevance 6%]

* [PATCH 00/13] lei approxidate, startup fix, --alert
@ 2021-02-08  9:05  6% Eric Wong
  2021-02-08  9:05  6% ` [PATCH 11/13] lei q: use git approxidate with d:, dt: and rt: ranges Eric Wong
  0 siblings, 1 reply; 3+ results
From: Eric Wong @ 2021-02-08  9:05 UTC (permalink / raw)
  To: meta

I've redone and squashed some changes into PATCH 1/13 which
was posted yesterday.

3/13 (SIGWINCH) is rebase necessary after 1/13,
4/13 (--alert=CMD) is a generalized take on 3/13.

12/13 is...

Eric Wong (13):
  lei q: improve remote mboxrd UX + MUA
  lei_xsearch: quiet Eml warnings from remote mboxrds
  lei q: SIGWINCH process group with the terminal
  lei q: support --alert=CMD for early MUA users
  tests: favor IPv6
  ds: improve add_timer usability
  lei: start_pager: drop COLUMNS default
  lei: avoid racing on unlink + bind + listen
  lei: drop BSD::Resource usage
  git: implement date_parse method
  lei q: use git approxidate with d:, dt: and rt: ranges
  search: use one git-rev-parse process for all dates
  spawnpp: raise exception on E2BIG errors

 lib/PublicInbox/DS.pm           |  10 ++--
 lib/PublicInbox/ExtSearchIdx.pm |   5 +-
 lib/PublicInbox/FakeInotify.pm  |   4 +-
 lib/PublicInbox/Git.pm          |  10 +++-
 lib/PublicInbox/IPC.pm          |   8 +--
 lib/PublicInbox/LEI.pm          | 100 ++++++++++++++++++++++----------
 lib/PublicInbox/LeiCurl.pm      |  11 +++-
 lib/PublicInbox/LeiMirror.pm    |   5 +-
 lib/PublicInbox/LeiOverview.pm  |   6 +-
 lib/PublicInbox/LeiQuery.pm     |  12 ++--
 lib/PublicInbox/LeiToMail.pm    |  24 ++++----
 lib/PublicInbox/LeiXSearch.pm   |  97 ++++++++++++++++++++-----------
 lib/PublicInbox/Search.pm       |  86 +++++++++++++++++++++++++++
 lib/PublicInbox/SpawnPP.pm      |  23 ++++++--
 lib/PublicInbox/TestCommon.pm   |  30 ++++++++--
 lib/PublicInbox/Watch.pm        |  19 +++---
 script/lei                      |  16 ++---
 t/extsearch.t                   |   2 +-
 t/git.t                         |  17 +++++-
 t/httpd-corner.psgi             |   2 +-
 t/httpd-corner.t                |  12 ++--
 t/httpd-https.t                 |   2 +-
 t/httpd-unix.t                  |   7 +--
 t/httpd.t                       |   8 +--
 t/imapd-tls.t                   |   4 +-
 t/imapd.t                       |   8 +--
 t/lei-mirror.t                  |   2 +-
 t/nntpd-tls.t                   |   4 +-
 t/nntpd.t                       |  11 ++--
 t/psgi_attach.t                 |   2 +-
 t/psgi_v2.t                     |   2 +-
 t/search.t                      |  51 ++++++++++++++++
 t/solver_git.t                  |   2 +-
 t/v2mirror.t                    |   3 +-
 t/v2writable.t                  |   3 +-
 t/www_altid.t                   |   2 +-
 t/www_listing.t                 |   3 +-
 xt/git-http-backend.t           |   4 +-
 xt/httpd-async-stream.t         |   2 +-
 xt/imapd-mbsync-oimap.t         |   4 +-
 xt/imapd-validate.t             |   4 +-
 xt/mem-imapd-tls.t              |   2 +-
 xt/nntpd-validate.t             |   3 +-
 xt/perf-nntpd.t                 |  16 ++---
 xt/solver.t                     |   3 +-
 45 files changed, 441 insertions(+), 210 deletions(-)


^ permalink raw reply	[relevance 6%]

Results 1-3 of 3 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2021-02-08  9:05  6% [PATCH 00/13] lei approxidate, startup fix, --alert Eric Wong
2021-02-08  9:05  6% ` [PATCH 11/13] lei q: use git approxidate with d:, dt: and rt: ranges Eric Wong
2021-02-10  9:59  7%   ` [PATCH] search: fix argv handling of quoted phrases Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).