* [PATCH 2/6] *search: favor wantarray form of xap_terms
2023-12-08 3:54 7% [PATCH 0/6] cindex join stuff Eric Wong
@ 2023-12-08 3:54 7% ` Eric Wong
0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2023-12-08 3:54 UTC (permalink / raw)
To: meta
Most xap_terms callers do not benefit from the hashref
return value, and we can delay hashmap use until
List::Util::uniqstr if needed.
---
lib/PublicInbox/CodeSearch.pm | 15 ++++++---------
lib/PublicInbox/LeiSearch.pm | 17 +++++++----------
lib/PublicInbox/LeiStore.pm | 13 +++++++------
3 files changed, 20 insertions(+), 25 deletions(-)
diff --git a/lib/PublicInbox/CodeSearch.pm b/lib/PublicInbox/CodeSearch.pm
index 3092718d..48697cdc 100644
--- a/lib/PublicInbox/CodeSearch.pm
+++ b/lib/PublicInbox/CodeSearch.pm
@@ -9,6 +9,7 @@ use v5.12;
use parent qw(PublicInbox::Search);
use PublicInbox::Config;
use PublicInbox::Search qw(retry_reopen int_val xap_terms);
+use PublicInbox::Compat qw(uniqstr);
use Compress::Zlib qw(uncompress);
use constant {
AT => 0, # author time YYYYMMDDHHMMSS, dt: for mail)
@@ -199,12 +200,11 @@ sub roots2paths { # for diagnostics
do {
my $mset = $enq->get_mset($off += $size, $lim);
for my $x ($mset->items) {
- my $tmp = xap_terms('P', $x->get_document);
- push @$dirs, keys %$tmp;
+ push @$dirs, xap_terms('P', $x->get_document);
}
$size = $mset->size;
} while ($size);
- @$dirs = sort @$dirs;
+ @$dirs = sort(uniqstr(@$dirs));
}
\%ret;
}
@@ -223,12 +223,9 @@ sub root_oids ($$) {
my @ids = docids_of_git_dir $self, $git_dir or warn <<"";
BUG? (non-fatal) `$git_dir' not indexed in $self->{topdir}
- my %ret;
- for my $docid (@ids) {
- my @oids = xap_terms('G', $self->xdb, $docid);
- @ret{@oids} = @oids;
- }
- sort keys %ret;
+ my @ret = map { xap_terms('G', $self->xdb, $_) } @ids;
+ @ret = uniqstr(@ret) if @ids > 1;
+ @ret;
}
sub paths2roots {
diff --git a/lib/PublicInbox/LeiSearch.pm b/lib/PublicInbox/LeiSearch.pm
index ba4c4309..29e3213f 100644
--- a/lib/PublicInbox/LeiSearch.pm
+++ b/lib/PublicInbox/LeiSearch.pm
@@ -9,6 +9,7 @@ use parent qw(PublicInbox::ExtSearch); # PublicInbox::Search->reopen
use PublicInbox::Search qw(xap_terms);
use PublicInbox::ContentHash qw(content_digest content_hash git_sha);
use PublicInbox::MID qw(mids mids_for_index);
+use PublicInbox::Compat qw(uniqstr);
use Carp qw(croak);
sub _msg_kw { # retry_reopen callback
@@ -44,20 +45,16 @@ sub oidbin_keywords {
sub _xsmsg_vmd { # retry_reopen
my ($self, $smsg, $want_label) = @_;
my $xdb = $self->xdb; # set {nshard};
- my (%kw, %L, $doc, $x);
- $kw{flagged} = 1 if delete($smsg->{lei_q_tt_flagged});
+ my (@kw, @L, $doc, $x);
+ @kw = qw(flagged) if delete($smsg->{lei_q_tt_flagged});
my @num = $self->over->blob_exists($smsg->{blob});
for my $num (@num) { # there should only be one...
$doc = $xdb->get_document($self->num2docid($num));
- $x = xap_terms('K', $doc);
- %kw = (%kw, %$x);
- if ($want_label) { # JSON/JMAP only
- $x = xap_terms('L', $doc);
- %L = (%L, %$x);
- }
+ push @kw, xap_terms('K', $doc);
+ push @L, xap_terms('L', $doc) if $want_label # JSON/JMAP only
}
- $smsg->{kw} = [ sort keys %kw ] if scalar(keys(%kw));
- $smsg->{L} = [ sort keys %L ] if scalar(keys(%L));
+ @{$smsg->{kw}} = sort(uniqstr(@kw)) if @kw;
+ @{$smsg->{L}} = uniqstr(@L) if @L;
}
# lookup keywords+labels for external messages
diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm
index aebb85a9..a752174d 100644
--- a/lib/PublicInbox/LeiStore.pm
+++ b/lib/PublicInbox/LeiStore.pm
@@ -27,6 +27,7 @@ use PublicInbox::MDA;
use PublicInbox::Spawn qw(spawn);
use PublicInbox::MdirReader;
use PublicInbox::LeiToMail;
+use PublicInbox::Compat qw(uniqstr);
use File::Temp qw(tmpnam);
use POSIX ();
use IO::Handle (); # ->autoflush
@@ -341,15 +342,15 @@ sub _add_vmd ($$$$) {
sub _docids_and_maybe_kw ($$) {
my ($self, $docids) = @_;
return $docids unless wantarray;
- my $kw = {};
+ my (@kw, $idx, @tmp);
for my $num (@$docids) { # likely only 1, unless ContentHash changes
# can't use ->search->msg_keywords on uncommitted docs
- my $idx = $self->{priv_eidx}->idx_shard($num);
- my $tmp = eval { $idx->ipc_do('get_terms', 'K', $num) };
- if ($@) { warn "#$num get_terms: $@" }
- else { @$kw{keys %$tmp} = values(%$tmp) };
+ $idx = $self->{priv_eidx}->idx_shard($num);
+ @tmp = eval { $idx->ipc_do('get_terms', 'K', $num) };
+ $@ ? warn("#$num get_terms: $@") : push(@kw, @tmp);
}
- ($docids, [ sort keys %$kw ]);
+ @kw = sort(uniqstr(@kw)) if @$docids > 1;
+ ($docids, \@kw);
}
sub _reindex_1 { # git->cat_async callback
^ permalink raw reply related [relevance 7%]
* [PATCH 0/6] cindex join stuff
@ 2023-12-08 3:54 7% Eric Wong
2023-12-08 3:54 7% ` [PATCH 2/6] *search: favor wantarray form of xap_terms Eric Wong
0 siblings, 1 reply; 2+ results
From: Eric Wong @ 2023-12-08 3:54 UTC (permalink / raw)
To: meta
1-2 are small speedups, 3-4 are dev improvements, and 5-6
ought to actually improve and future-proof join accuracy.
Eric Wong (6):
*search: simplify handling of Xapian term iterators
*search: favor wantarray form of xap_terms
xap_helper_cxx: drop chdir usage in build
makefile: add `check-build' target
xap_helper: support term length limit
cindex: switch --join to use dfpost7 by default
Makefile.PL | 13 +++++++
lib/PublicInbox/CodeSearch.pm | 15 ++++----
lib/PublicInbox/CodeSearchIdx.pm | 18 +++++-----
lib/PublicInbox/LeiInspect.pm | 1 -
lib/PublicInbox/LeiSearch.pm | 17 ++++-----
lib/PublicInbox/LeiStore.pm | 13 +++----
lib/PublicInbox/Search.pm | 19 +++++-----
lib/PublicInbox/SearchIdx.pm | 13 ++++---
lib/PublicInbox/XapHelper.pm | 24 ++++++++++---
lib/PublicInbox/XapHelperCxx.pm | 19 ++++------
lib/PublicInbox/xap_helper.h | 11 +++++-
lib/PublicInbox/xh_cidx.h | 61 ++++++++++++++++++++++++--------
lib/PublicInbox/xh_mset.h | 2 +-
t/xap_helper.t | 33 +++++++++++++++++
14 files changed, 177 insertions(+), 82 deletions(-)
^ permalink raw reply [relevance 7%]
Results 1-2 of 2 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2023-12-08 3:54 7% [PATCH 0/6] cindex join stuff Eric Wong
2023-12-08 3:54 7% ` [PATCH 2/6] *search: favor wantarray form of xap_terms Eric Wong
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).