* [PATCH 1/3] search: make xap_terms easier-to-use and use it more
2021-06-23 11:14 [PATCH 0/3] some WWW search things Eric Wong
@ 2021-06-23 11:14 ` Eric Wong
2021-06-23 11:14 ` [PATCH 2/3] www_listing: start updating for pagination + search Eric Wong
2021-06-23 11:14 ` [PATCH 3/3] www: do not warn on blank query parameters Eric Wong
2 siblings, 0 replies; 4+ messages in thread
From: Eric Wong @ 2021-06-23 11:14 UTC (permalink / raw)
To: meta
This allows us to simplify callers throughout, and exceptions are
can no longer be silently hidden. MiscSearch now uses xap_terms
for looking up eidx_key terms for a code reduction.
We also simplify LeiStore->_msg_kw for runtime use by moving the
MsetIterator handling into t/lei_store.t test case.
---
lib/PublicInbox/LeiSearch.pm | 16 +++++++---------
lib/PublicInbox/LeiXSearch.pm | 4 ++--
lib/PublicInbox/MiscSearch.pm | 23 +++++++----------------
lib/PublicInbox/Search.pm | 22 +++++++++-------------
lib/PublicInbox/SearchIdx.pm | 5 +++--
t/lei_store.t | 3 ++-
6 files changed, 30 insertions(+), 43 deletions(-)
diff --git a/lib/PublicInbox/LeiSearch.pm b/lib/PublicInbox/LeiSearch.pm
index d0963e92..06ea6299 100644
--- a/lib/PublicInbox/LeiSearch.pm
+++ b/lib/PublicInbox/LeiSearch.pm
@@ -19,16 +19,13 @@ sub num2docid ($$) {
}
sub _msg_kw { # retry_reopen callback
- my ($self, $num) = @_; # num_or_mitem
- my $xdb = $self->xdb; # set {nshard};
- my $docid = ref($num) ? $num->get_docid : num2docid($self, $num);
- my $kw = xap_terms('K', $xdb, $docid);
- warn "E: #$docid ($num): $@\n" if $@;
- wantarray ? sort(keys(%$kw)) : $kw;
+ my ($self, $num) = @_;
+ my $xdb = $self->xdb; # set {nshard} for num2docid;
+ xap_terms('K', $xdb, num2docid($self, $num));
}
-sub msg_keywords {
- my ($self, $num) = @_; # num_or_mitem
+sub msg_keywords { # array or hashref
+ my ($self, $num) = @_;
$self->retry_reopen(\&_msg_kw, $num);
}
@@ -138,7 +135,8 @@ sub kw_changed {
$docids //= [];
@$docids = sort { $a <=> $b } values %$xoids;
}
- my $cur_kw = msg_keywords($self, $docids->[0]);
+ my $cur_kw = eval { msg_keywords($self, $docids->[0]) };
+ die "E: #$docids->[0] keyword lookup failure: $@\n" if $@;
# RFC 5550 sec 5.9 on the $Forwarded keyword states:
# "Once set, the flag SHOULD NOT be cleared"
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index beb955bb..cac7fb7d 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -71,11 +71,11 @@ sub _mitem_kw { # retry_reopen callback
my $doc = $mitem->get_document;
my $kw = xap_terms('K', $doc);
$kw->{flagged} = 1 if $flagged;
- my $L = xap_terms('L', $doc);
+ my @L = xap_terms('L', $doc);
# we keep the empty {kw} array here to prevent expensive work in
# ->xsmsg_vmd, _unbless_smsg will clobber it iff it's empty
$smsg->{kw} = [ sort keys %$kw ];
- $smsg->{L} = [ sort keys %$L ] if scalar(keys %$L);
+ $smsg->{L} = \@L if scalar(@L);
}
sub mitem_kw ($$$;$) {
diff --git a/lib/PublicInbox/MiscSearch.pm b/lib/PublicInbox/MiscSearch.pm
index ead9a278..4e010453 100644
--- a/lib/PublicInbox/MiscSearch.pm
+++ b/lib/PublicInbox/MiscSearch.pm
@@ -5,7 +5,7 @@
package PublicInbox::MiscSearch;
use strict;
use v5.10.1;
-use PublicInbox::Search qw(retry_reopen int_val);
+use PublicInbox::Search qw(retry_reopen int_val xap_terms);
my $json;
# Xapian value columns:
@@ -90,15 +90,10 @@ sub ibx_matches_once { # retry_reopen callback
while (1) {
my $mset = misc_enquire_once($self, $qr, $opt);
for my $mi ($mset->items) {
- my $doc = $mi->get_document;
- my $end = $doc->termlist_end;
- my $cur = $doc->termlist_begin;
- $cur->skip_to('Q');
- if ($cur != $end) {
- my $ng = $cur->get_termname; # eidx_key
- $ng =~ s/\AQ// or warn "BUG: no `Q': $ng";
- if (my $ibx = $by_newsgroup->{$ng}) {
- $ret->{$ng} = $ibx;
+ my ($eidx_key) = xap_terms('Q', $mi->get_document);
+ if (defined($eidx_key)) {
+ if (my $ibx = $by_newsgroup->{$eidx_key}) {
+ $ret->{$eidx_key} = $ibx;
}
} else {
warn <<EOF;
@@ -144,12 +139,8 @@ sub inbox_data {
sub ibx_cache_load {
my ($doc, $cache) = @_;
- my $end = $doc->termlist_end;
- my $cur = $doc->termlist_begin;
- $cur->skip_to('Q');
- return if $cur == $end;
- my $eidx_key = $cur->get_termname;
- $eidx_key =~ s/\AQ// or return; # expired
+ my ($eidx_key) = xap_terms('Q', $doc);
+ return unless defined($eidx_key); # expired
my $ce = $cache->{$eidx_key} = {};
$ce->{uidvalidity} = int_val($doc, $UIDVALIDITY);
$ce->{-modified} = int_val($doc, $MODIFIED);
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 59a5a3b0..7e19e616 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -557,19 +557,15 @@ sub get_pct ($) { # mset item
sub xap_terms ($$;@) {
my ($pfx, $xdb_or_doc, @docid) = @_; # @docid may be empty ()
my %ret;
- eval {
- my $end = $xdb_or_doc->termlist_end(@docid);
- my $cur = $xdb_or_doc->termlist_begin(@docid);
- for (; $cur != $end; $cur++) {
- $cur->skip_to($pfx);
- last if $cur == $end;
- my $tn = $cur->get_termname;
- if (index($tn, $pfx) == 0) {
- $ret{substr($tn, length($pfx))} = undef;
- }
- }
- };
- \%ret;
+ my $end = $xdb_or_doc->termlist_end(@docid);
+ my $cur = $xdb_or_doc->termlist_begin(@docid);
+ for (; $cur != $end; $cur++) {
+ $cur->skip_to($pfx);
+ last if $cur == $end;
+ my $tn = $cur->get_termname;
+ $ret{substr($tn, length($pfx))} = undef if !index($tn, $pfx);
+ }
+ wantarray ? sort(keys(%ret)) : \%ret;
}
1;
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index f553eda6..65764cc8 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -435,8 +435,9 @@ sub add_xapian ($$$$) {
if (my $old = $merge_vmd ? _get_doc($self, $smsg->{num}) : undef) {
my @x = @VMD_MAP;
while (my ($field, $pfx) = splice(@x, 0, 2)) {
- my $vals = xap_terms($pfx, $old);
- $doc->add_boolean_term($pfx.$_) for keys %$vals;
+ for my $term (xap_terms($pfx, $old)) {
+ $doc->add_boolean_term($pfx.$term);
+ }
}
}
$self->{xdb}->replace_document($smsg->{num}, $doc);
diff --git a/t/lei_store.t b/t/lei_store.t
index db94f6da..73b5c74d 100644
--- a/t/lei_store.t
+++ b/t/lei_store.t
@@ -31,7 +31,8 @@ $sto->done;
is($mset->size, 1, 'search works');
is_deeply($es->mset_to_artnums($mset), [ $msgs->[0]->{num} ],
'mset_to_artnums');
- my @kw = $es->msg_keywords(($mset->items)[0]);
+ my $mi = ($mset->items)[0];
+ my @kw = PublicInbox::Search::xap_terms('K', $mi->get_document);
is_deeply(\@kw, [], 'no flags');
}
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 2/3] www_listing: start updating for pagination + search
2021-06-23 11:14 [PATCH 0/3] some WWW search things Eric Wong
2021-06-23 11:14 ` [PATCH 1/3] search: make xap_terms easier-to-use and use it more Eric Wong
@ 2021-06-23 11:14 ` Eric Wong
2021-06-23 11:14 ` [PATCH 3/3] www: do not warn on blank query parameters Eric Wong
2 siblings, 0 replies; 4+ messages in thread
From: Eric Wong @ 2021-06-23 11:14 UTC (permalink / raw)
To: meta
When dealing with thousands of inboxes, displaying all of
them on a single page isn't going to work. So steal some
pagination and search results code from the message search
to generate some basic HTML output that looks good in w3m.
---
lib/PublicInbox/Config.pm | 5 ++
lib/PublicInbox/ManifestJsGz.pm | 2 +-
lib/PublicInbox/MiscSearch.pm | 34 +++----
lib/PublicInbox/SearchQuery.pm | 13 +--
lib/PublicInbox/SearchView.pm | 2 +-
lib/PublicInbox/WwwListing.pm | 155 +++++++++++++++++++++++++++-----
6 files changed, 164 insertions(+), 47 deletions(-)
diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm
index 3f0f5a01..36f2fafb 100644
--- a/lib/PublicInbox/Config.pm
+++ b/lib/PublicInbox/Config.pm
@@ -97,6 +97,11 @@ sub lookup_ei {
$self->{-ei_by_name}->{$name} //= _fill_ei($self, $name);
}
+sub lookup_eidx_key {
+ my ($self, $eidx_key) = @_;
+ _lookup_fill($self, '-by_eidx_key', $eidx_key);
+}
+
# special case for [extindex "all"]
sub ALL { lookup_ei($_[0], 'all') }
diff --git a/lib/PublicInbox/ManifestJsGz.pm b/lib/PublicInbox/ManifestJsGz.pm
index 31cf15dc..e7bb0e86 100644
--- a/lib/PublicInbox/ManifestJsGz.pm
+++ b/lib/PublicInbox/ManifestJsGz.pm
@@ -81,7 +81,7 @@ sub ibx_entry {
warn "E: $@" if $@;
}
-sub hide_key { 'manifest' }
+sub hide_key { 'manifest' } # for WwwListing->list_match_i
# overrides WwwListing->psgi_triple
sub psgi_triple {
diff --git a/lib/PublicInbox/MiscSearch.pm b/lib/PublicInbox/MiscSearch.pm
index 4e010453..6b575b0d 100644
--- a/lib/PublicInbox/MiscSearch.pm
+++ b/lib/PublicInbox/MiscSearch.pm
@@ -59,7 +59,7 @@ sub misc_enquire_once { # retry_reopen callback
$eq->set_query($qr);
my $desc = !$opt->{asc};
my $rel = $opt->{relevance} // 0;
- if ($rel == -1) { # ORDER BY docid/UID
+ if ($rel == -1) { # ORDER BY docid
$eq->set_docid_order($PublicInbox::Search::ENQ_ASCENDING);
$eq->set_weighting_scheme($PublicInbox::Search::X{BoolWeight}->new);
} elsif ($rel) {
@@ -132,6 +132,23 @@ sub ibx_data_once {
}
}
+sub doc2ibx_cache_ent { # @_ == ($self, $doc) OR ($doc)
+ my ($doc) = $_[-1];
+ my $d;
+ my $data = $json->decode($doc->get_data);
+ for (values %$data) {
+ $d = $_->{description} // next;
+ $d =~ s/ \[epoch [0-9]+\]\z// or next;
+ last;
+ }
+ {
+ uidvalidity => int_val($doc, $UIDVALIDITY),
+ -modified => int_val($doc, $MODIFIED),
+ # extract description from manifest.js.gz epoch description
+ description => $d
+ };
+}
+
sub inbox_data {
my ($self, $ibx) = @_;
retry_reopen($self, \&ibx_data_once, $ibx);
@@ -141,20 +158,7 @@ sub ibx_cache_load {
my ($doc, $cache) = @_;
my ($eidx_key) = xap_terms('Q', $doc);
return unless defined($eidx_key); # expired
- my $ce = $cache->{$eidx_key} = {};
- $ce->{uidvalidity} = int_val($doc, $UIDVALIDITY);
- $ce->{-modified} = int_val($doc, $MODIFIED);
- $ce->{description} = do {
- # extract description from manifest.js.gz epoch description
- my $d;
- my $data = $json->decode($doc->get_data);
- for (values %$data) {
- $d = $_->{description} // next;
- $d =~ s/ \[epoch [0-9]+\]\z// or next;
- last;
- }
- $d;
- }
+ $cache->{$eidx_key} = doc2ibx_cache_ent($doc);
}
sub _nntpd_cache_load { # retry_reopen callback
diff --git a/lib/PublicInbox/SearchQuery.pm b/lib/PublicInbox/SearchQuery.pm
index 0f360500..a6b7d843 100644
--- a/lib/PublicInbox/SearchQuery.pm
+++ b/lib/PublicInbox/SearchQuery.pm
@@ -1,7 +1,7 @@
# Copyright (C) 2015-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
-# used by PublicInbox::SearchView
+# used by PublicInbox::SearchView and PublicInbox::WwwListing
package PublicInbox::SearchQuery;
use strict;
use v5.10.1;
@@ -32,11 +32,12 @@ sub qs_html {
if (scalar(keys(%override))) {
$self = bless { (%$self, %override) }, ref($self);
}
-
- my $q = uri_escape($self->{'q'}, MID_ESC);
- $q =~ s/%20/+/g; # improve URL readability
- my $qs = "q=$q";
-
+ my $qs = '';
+ if (defined(my $q = $self->{'q'})) {
+ $q = uri_escape($q, MID_ESC);
+ $q =~ s/%20/+/g; # improve URL readability
+ $qs .= "q=$q";
+ }
if (my $o = $self->{o}) { # ignore o == 0
$qs .= "&o=$o";
}
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index e13359d5..c0c801b3 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -219,7 +219,7 @@ sub search_nav_top {
$rv .= qq{</pre></form><pre>};
}
-sub search_nav_bot {
+sub search_nav_bot { # also used by WwwListing for searching extindex miscidx
my ($mset, $q) = @_;
my $total = $mset->get_matches_estimated;
my $l = $q->{l};
diff --git a/lib/PublicInbox/WwwListing.pm b/lib/PublicInbox/WwwListing.pm
index f28eddf1..eb015742 100644
--- a/lib/PublicInbox/WwwListing.pm
+++ b/lib/PublicInbox/WwwListing.pm
@@ -5,7 +5,7 @@
# Used by PublicInbox::WWW
package PublicInbox::WwwListing;
use strict;
-use PublicInbox::Hval qw(prurl fmt_ts);
+use PublicInbox::Hval qw(prurl fmt_ts ascii_html);
use PublicInbox::Linkify;
use PublicInbox::GzipFilter qw(gzf_maybe);
use PublicInbox::ConfigIter;
@@ -13,18 +13,19 @@ use PublicInbox::WwwStream;
use bytes (); # bytes::length
sub ibx_entry {
- my ($ctx, $ibx) = @_;
- my $mtime = $ibx->modified;
- my $ts = fmt_ts($mtime);
+ my ($ctx, $ibx, $ce) = @_;
+ $ce->{description} //= $ibx->description;
+ my $ts = fmt_ts($ce->{-modified} //= $ibx->modified);
my $url = prurl($ctx->{env}, $ibx->{url});
my $tmp = <<"";
* $ts - $url
- ${\$ibx->description}
+ $ce->{description}
if (defined(my $info_url = $ibx->{infourl})) {
$tmp .= ' ' . prurl($ctx->{env}, $info_url) . "\n";
}
- push @{$ctx->{-list}}, [ $mtime, $tmp ];
+ push(@{$ctx->{-list}}, (scalar(@_) == 3 ? # $misc in use, already sorted
+ $tmp : [ $ce->{-modified}, $tmp ] ));
}
sub list_match_i { # ConfigIter callback
@@ -41,7 +42,7 @@ sub list_match_i { # ConfigIter callback
}
}
-sub url_regexp {
+sub url_filter {
my ($ctx, $key, $default) = @_;
$key //= 'publicInbox.wwwListing';
$default //= '404';
@@ -50,9 +51,9 @@ again:
if ($v eq 'match=domain') {
my $h = $ctx->{env}->{HTTP_HOST} // $ctx->{env}->{SERVER_NAME};
$h =~ s/:[0-9]+\z//;
- qr!\A(?:https?:)?//\Q$h\E(?::[0-9]+)?/!i;
+ (qr!\A(?:https?:)?//\Q$h\E(?::[0-9]+)?/!i, "url:$h");
} elsif ($v eq 'all') {
- qr/./;
+ (qr/./, undef);
} elsif ($v eq '404') {
undef;
} else {
@@ -67,22 +68,122 @@ EOF
sub hide_key { 'www' }
+sub add_misc_ibx { # MiscSearch->retry_reopen callback
+ my ($misc, $ctx, $re, $qs) = @_;
+ require PublicInbox::SearchQuery;
+ my $q = $ctx->{-sq} = PublicInbox::SearchQuery->new($ctx->{qp});
+ my $o = $q->{o};
+ my ($asc, $min, $max);
+ if ($o < 0) {
+ $asc = 1;
+ $o = -($o + 1); # so [-1] is the last element, like Perl lists
+ }
+ my $r = $q->{r};
+ my $opt = {
+ offset => $o,
+ asc => $asc,
+ relevance => $r,
+ limit => $q->{l}
+ };
+ $qs .= ' type:inbox';
+ if (my $user_query = $q->{'q'}) {
+ $qs = "( $qs ) AND ( $user_query )";
+ }
+ my $mset = $misc->mset($qs, $opt); # sorts by $MODIFIED (mtime)
+ $ctx->{-list} = [];
+ my $pi_cfg = $ctx->{www}->{pi_cfg};
+ for my $mi ($mset->items) {
+ my $doc = $mi->get_document;
+ my ($eidx_key) = PublicInbox::Search::xap_terms('Q', $doc);
+ $eidx_key // next;
+ my $ibx = $pi_cfg->lookup_eidx_key($eidx_key) // next;
+ next if $ibx->{-hide}->{$ctx->hide_key};
+ grep(/$re/, @{$ibx->{url}}) or next;
+ $ctx->ibx_entry($ibx, $misc->doc2ibx_cache_ent($doc));
+ if ($r) { # for descriptions in search_nav_bot
+ my $pct = PublicInbox::Search::get_pct($mi);
+ # only when sorting by relevance, ->items is always
+ # ordered descending:
+ $max //= $pct;
+ $min = $pct;
+ }
+ }
+ if ($r) { # for descriptions in search_nav_bot
+ $q->{-min_pct} = $min;
+ $q->{-max_pct} = $max;
+ }
+ $ctx->{-mset} = $mset;
+ psgi_triple($ctx);
+}
+
sub response {
my ($class, $ctx) = @_;
bless $ctx, $class;
- if (my $ALL = $ctx->{www}->{pi_cfg}->ALL) {
- $ALL->misc->reopen;
- }
- my $re = $ctx->url_regexp or return $ctx->psgi_triple;
- my $iter = PublicInbox::ConfigIter->new($ctx->{www}->{pi_cfg},
+ my ($re, $qs) = $ctx->url_filter;
+ $re // return $ctx->psgi_triple;
+ if (my $ALL = $ctx->{www}->{pi_cfg}->ALL) { # fast path
+ $ALL->misc->reopen->retry_reopen(\&add_misc_ibx,
+ $ctx, $re, $qs);
+ } else { # slow path, no [extindex "all"] configured
+ my $iter = PublicInbox::ConfigIter->new($ctx->{www}->{pi_cfg},
\&list_match_i, $re, $ctx);
- sub {
- $ctx->{-wcb} = $_[0]; # HTTP server callback
- $ctx->{env}->{'pi-httpd.async'} ?
- $iter->event_step : $iter->each_section;
+ sub {
+ $ctx->{-wcb} = $_[0]; # HTTP server callback
+ $ctx->{env}->{'pi-httpd.async'} ?
+ $iter->event_step : $iter->each_section;
+ }
}
}
+sub mset_footer ($$) {
+ my ($ctx, $mset) = @_;
+ # no footer if too few matches
+ return '' if $mset->get_matches_estimated == $mset->size;
+ require PublicInbox::SearchView;
+ PublicInbox::SearchView::search_nav_bot($mset, $ctx->{-sq});
+}
+
+sub mset_nav_top {
+ my ($ctx, $mset) = @_;
+ my $q = $ctx->{-sq};
+ my $qh = $q->{'q'} // '';
+ utf8::decode($qh);
+ $qh = ascii_html($qh);
+ $qh = qq[\nvalue="$qh"] if $qh ne '';
+ my $rv = <<EOM;
+<form
+action="./"><pre><input
+name=q
+type=text$qh /><input
+type=submit
+value="locate inbox" /></pre></form><pre>
+EOM
+ chomp $rv;
+ if (defined($q->{'q'})) {
+ my $initial_q = $ctx->{-uxs_retried};
+ if (defined $initial_q) {
+ my $rewritten = $q->{'q'};
+ utf8::decode($initial_q);
+ utf8::decode($rewritten);
+ $initial_q = ascii_html($initial_q);
+ $rewritten = ascii_html($rewritten);
+ $rv .= " Warning: Initial query:\n <b>$initial_q</b>\n";
+ $rv .= " returned no results, used:\n";
+ $rv .= " <b>$rewritten</b>\n instead\n\n";
+ }
+ $rv .= 'Search results ordered by [';
+ if ($q->{r}) {
+ my $d = $q->qs_html(r => 0);
+ $rv .= qq{<a\nhref="?$d">updated</a>|<b>relevance</b>};
+ } else {
+ my $d = $q->qs_html(r => 1);
+ $rv .= qq{<b>updated</b>|<a\nhref="?$d">relevance</a>};
+ }
+ $rv .= ']';
+ }
+ $rv .= qq{</pre>};
+}
+
sub psgi_triple {
my ($ctx) = @_;
my $h = [ 'Content-Type', 'text/html; charset=UTF-8',
@@ -90,17 +191,23 @@ sub psgi_triple {
my $gzf = gzf_maybe($h, $ctx->{env});
$gzf->zmore('<html><head><title>' .
'public-inbox listing</title>' .
- '</head><body><pre>');
+ '</head><body>');
my $code = 404;
- if (my $list = $ctx->{-list}) {
+ if (my $list = delete $ctx->{-list}) {
+ my $mset = delete $ctx->{-mset};
$code = 200;
- # sort by ->modified
- @$list = map { $_->[1] } sort { $b->[0] <=> $a->[0] } @$list;
+ if ($mset) { # already sorted, so search bar:
+ $gzf->zmore(mset_nav_top($ctx, $mset));
+ } else { # sort config dump by ->modified
+ @$list = map { $_->[1] }
+ sort { $b->[0] <=> $a->[0] } @$list;
+ }
$list = join("\n", @$list);
my $l = PublicInbox::Linkify->new;
- $gzf->zmore($l->to_html($list));
+ $gzf->zmore('<pre>'.$l->to_html($list));
+ $gzf->zmore(mset_footer($ctx, $mset)) if $mset;
} else {
- $gzf->zmore('no inboxes, yet');
+ $gzf->zmore('<pre>no inboxes, yet');
}
my $out = $gzf->zflush('</pre><hr><pre>'.
PublicInbox::WwwStream::code_footer($ctx->{env}) .
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 3/3] www: do not warn on blank query parameters
2021-06-23 11:14 [PATCH 0/3] some WWW search things Eric Wong
2021-06-23 11:14 ` [PATCH 1/3] search: make xap_terms easier-to-use and use it more Eric Wong
2021-06-23 11:14 ` [PATCH 2/3] www_listing: start updating for pagination + search Eric Wong
@ 2021-06-23 11:14 ` Eric Wong
2 siblings, 0 replies; 4+ messages in thread
From: Eric Wong @ 2021-06-23 11:14 UTC (permalink / raw)
To: meta
Sometimes users (or bots) may lead queries with '&' and
trigger uninitialized variable warnings, just ignore them
and give consumers a $ctx->{qp}->{''} entry.
While we're in the area, pass a regexp rather than scalar string
to the `split' perlop to prevent Perl from recompiling the
regexp on every call.
---
lib/PublicInbox/WWW.pm | 5 ++---
t/psgi_search.t | 4 ++++
2 files changed, 6 insertions(+), 3 deletions(-)
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index 8f4bfd0f..841a7e85 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -50,10 +50,9 @@ sub call {
%{$ctx->{qp}} = map {
utf8::decode($_);
tr/+/ /;
- my ($k, $v) = split('=', $_, 2);
- $v = uri_unescape($v // '');
+ my ($k, $v) = split(/=/, $_, 2);
# none of the keys we care about will need escaping
- $k => $v;
+ ($k // '', uri_unescape($v // ''))
} split(/[&;]+/, $env->{QUERY_STRING});
my $path_info = path_info_raw($env);
diff --git a/t/psgi_search.t b/t/psgi_search.t
index d59e439b..5bdd66ed 100644
--- a/t/psgi_search.t
+++ b/t/psgi_search.t
@@ -88,6 +88,10 @@ test_psgi(sub { $www->call(@_) }, sub {
is($res->code, 200, 'successful search result');
is_deeply([], $warn, 'no warnings from non-numeric comparison');
+ $res = $cb->(GET('/test/?&q=s:test'));
+ is($res->code, 200, 'successful search result');
+ is_deeply([], $warn, 'no warnings from black parameter');
+
$res = $cb->(POST('/test/?q=s:bogus&x=m'));
is($res->code, 404, 'failed search result gives 404');
is_deeply([], $warn, 'no warnings');
^ permalink raw reply related [flat|nested] 4+ messages in thread