* [PATCH 4/5] searchidx: remove $oid parameter from most calls
2020-12-07 7:40 7% [PATCH 0/5] extindex: random cleanups Eric Wong
@ 2020-12-07 7:40 5% ` Eric Wong
0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2020-12-07 7:40 UTC (permalink / raw)
To: meta
Xapian docids have been tied to the over {num} column for
nearly 3 years, now; and OIDs are no longer stored in Xapian
document data. There's no need to increase code and IPC
complexity by passing the OID around.
---
lib/PublicInbox/ExtSearchIdx.pm | 15 +++++-------
lib/PublicInbox/SearchIdx.pm | 38 +++++++++++++------------------
lib/PublicInbox/SearchIdxShard.pm | 37 ++++++++++++++----------------
lib/PublicInbox/V2Writable.pm | 2 +-
4 files changed, 40 insertions(+), 52 deletions(-)
diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm
index 819c7903..c06b25a9 100644
--- a/lib/PublicInbox/ExtSearchIdx.pm
+++ b/lib/PublicInbox/ExtSearchIdx.pm
@@ -125,17 +125,16 @@ sub do_xpost ($$) {
if (my $new_smsg = $req->{new_smsg}) { # 'm' on cross-posted message
my $xnum = $req->{xnum};
$self->{oidx}->add_xref3($docid, $xnum, $oid, $eidx_key);
- $idx->shard_add_eidx_info($docid, $oid, $xibx, $eml);
+ $idx->shard_add_eidx_info($docid, $xibx, $eml);
check_batch_limit($req);
} else { # 'd'
my $rm_eidx_info;
my $nr = $self->{oidx}->remove_xref3($docid, $oid, $eidx_key,
\$rm_eidx_info);
if ($nr == 0) {
- $idx->shard_remove($oid, $docid);
+ $idx->shard_remove($docid);
} elsif ($rm_eidx_info) {
- $idx->shard_remove_eidx_info($docid, $oid, $eidx_key,
- $eml);
+ $idx->shard_remove_eidx_info($docid, $eidx_key, $eml);
}
}
}
@@ -333,13 +332,11 @@ DELETE FROM xref3 WHERE docid = ? AND ibx_id = ?
if (@$remain) {
for my $oid (@oid) {
warn "I: unref #$docid $eidx_key $oid\n";
- $idx->shard_remove_eidx_info($docid, $oid, $eidx_key);
+ $idx->shard_remove_eidx_info($docid, $eidx_key);
}
} else {
- for my $oid (@oid) {
- warn "I: remove #$docid $eidx_key $oid\n";
- $idx->shard_remove($oid, $docid);
- }
+ warn "I: remove #$docid $eidx_key @oid\n";
+ $idx->shard_remove($docid);
}
}
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index c18c7c36..0124dd11 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -445,20 +445,20 @@ sub add_message {
$smsg->{num};
}
-sub _get_doc ($$$) {
- my ($self, $docid, $oid) = @_;
+sub _get_doc ($$) {
+ my ($self, $docid) = @_;
my $doc = eval { $self->{xdb}->get_document($docid) };
$doc // do {
warn "E: $@\n" if $@;
- warn "E: #$docid $oid missing in Xapian\n";
+ warn "E: #$docid missing in Xapian\n";
undef;
}
}
sub add_eidx_info {
- my ($self, $docid, $oid, $eidx_key, $eml) = @_;
+ my ($self, $docid, $eidx_key, $eml) = @_;
begin_txn_lazy($self);
- my $doc = _get_doc($self, $docid, $oid) or return;
+ my $doc = _get_doc($self, $docid) or return;
term_generator($self)->set_document($doc);
$doc->add_boolean_term('O'.$eidx_key);
index_list_id($self, $doc, $eml);
@@ -466,9 +466,9 @@ sub add_eidx_info {
}
sub remove_eidx_info {
- my ($self, $docid, $oid, $eidx_key, $eml) = @_;
+ my ($self, $docid, $eidx_key, $eml) = @_;
begin_txn_lazy($self);
- my $doc = _get_doc($self, $docid, $oid) or return;
+ my $doc = _get_doc($self, $docid) or return;
eval { $doc->remove_term('O'.$eidx_key) };
warn "W: ->remove_term O$eidx_key: $@\n" if $@;
for my $l ($eml ? $eml->header_raw('List-Id') : ()) {
@@ -512,25 +512,19 @@ sub smsg_from_doc ($) {
}
sub xdb_remove {
- my ($self, $oid, @removed) = @_;
+ my ($self, @docids) = @_;
my $xdb = $self->{xdb} or return;
- for my $num (@removed) {
- my $doc = _get_doc($self, $num, $oid) or next;
- my $smsg = smsg_from_doc($doc);
- my $blob = $smsg->{blob}; # may be undef if --skip-docdata
- if (!defined($blob) || $blob eq $oid) {
- $xdb->delete_document($num);
- } else {
- warn "E: #$num $oid != $blob in Xapian\n";
- }
+ for my $docid (@docids) {
+ eval { $xdb->delete_document($docid) };
+ warn "E: #$docid not in in Xapian? $@\n" if $@;
}
}
-sub remove_by_oid {
- my ($self, $oid, $num) = @_;
- die "BUG: remove_by_oid is v2-only\n" if $self->{oidx};
+sub remove_by_docid {
+ my ($self, $num) = @_;
+ die "BUG: remove_by_docid is v2-only\n" if $self->{oidx};
$self->begin_txn_lazy;
- xdb_remove($self, $oid, $num) if need_xapian($self);
+ xdb_remove($self, $num) if need_xapian($self);
}
sub index_git_blob_id {
@@ -566,7 +560,7 @@ sub unindex_eml {
} else { # just in case msgmap and over.sqlite3 become desynched:
$self->{mm}->mid_delete($mids->[0]);
}
- xdb_remove($self, $oid, keys %tmp) if need_xapian($self);
+ xdb_remove($self, keys %tmp) if need_xapian($self);
}
sub index_mm {
diff --git a/lib/PublicInbox/SearchIdxShard.pm b/lib/PublicInbox/SearchIdxShard.pm
index 53fac9b6..182bbde2 100644
--- a/lib/PublicInbox/SearchIdxShard.pm
+++ b/lib/PublicInbox/SearchIdxShard.pm
@@ -79,19 +79,16 @@ sub shard_worker_loop ($$$$$) {
# no need to lock < 512 bytes is atomic under POSIX
print $bnote "barrier $shard\n" or
die "write failed for barrier $!\n";
- } elsif ($line =~ /\AD ([a-f0-9]{40,}) ([0-9]+)\n\z/s) {
- $self->remove_by_oid($1, $2 + 0);
+ } elsif ($line =~ /\AD ([0-9]+)\n\z/s) {
+ $self->remove_by_docid($1 + 0);
} elsif ($line =~ s/\A\+X //) {
- my ($len, $docid, $oid, $eidx_key) =
- split(/ /, $line, 4);
+ my ($len, $docid, $eidx_key) = split(/ /, $line, 3);
chomp $eidx_key;
- $self->add_eidx_info($docid, $oid, $eidx_key,
- eml($r, $len));
+ $self->add_eidx_info($docid, $eidx_key, eml($r, $len));
} elsif ($line =~ s/\A-X //) {
- my ($len, $docid, $oid, $eidx_key) =
- split(/ /, $line, 4);
+ my ($len, $docid, $eidx_key) = split(/ /, $line, 3);
chomp $eidx_key;
- $self->remove_eidx_info($docid, $oid, $eidx_key,
+ $self->remove_eidx_info($docid, $eidx_key,
eml($r, $len));
} elsif ($line =~ s/\AO ([^\n]+)\n//) {
my $over_fn = $1;
@@ -147,27 +144,27 @@ sub index_raw {
}
sub shard_add_eidx_info {
- my ($self, $docid, $oid, $xibx, $eml) = @_;
+ my ($self, $docid, $xibx, $eml) = @_;
my $eidx_key = $xibx->eidx_key;
if (my $w = $self->{w}) {
my $hdr = $eml->header_obj->as_string;
my $len = length($hdr);
- print $w "+X $len $docid $oid $eidx_key\n", $hdr or
+ print $w "+X $len $docid $eidx_key\n", $hdr or
die "failed to write shard: $!";
} else {
- $self->add_eidx_info($docid, $oid, $eidx_key, $eml);
+ $self->add_eidx_info($docid, $eidx_key, $eml);
}
}
sub shard_remove_eidx_info {
- my ($self, $docid, $oid, $eidx_key, $eml) = @_;
+ my ($self, $docid, $eidx_key, $eml) = @_;
if (my $w = $self->{w}) {
my $hdr = $eml ? $eml->header_obj->as_string : '';
my $len = length($hdr);
- print $w "-X $len $docid $oid $eidx_key\n", $hdr or
+ print $w "-X $len $docid $eidx_key\n", $hdr or
die "failed to write shard: $!";
} else {
- $self->remove_eidx_info($docid, $oid, $eidx_key, $eml);
+ $self->remove_eidx_info($docid, $eidx_key, $eml);
}
}
@@ -208,17 +205,17 @@ sub shard_close {
}
sub shard_remove {
- my ($self, $oid, $num) = @_;
- if (my $w = $self->{w}) { # triggers remove_by_oid in a shard child
- print $w "D $oid $num\n" or die "failed to write remove $!";
+ my ($self, $num) = @_;
+ if (my $w = $self->{w}) { # triggers remove_by_docid in a shard child
+ print $w "D $num\n" or die "failed to write remove $!";
} else { # same process
- $self->remove_by_oid($oid, $num);
+ $self->remove_by_docid($num);
}
}
sub shard_over_check {
my ($self, $over) = @_;
- if (my $w = $self->{w}) { # triggers remove_by_oid in a shard child
+ if (my $w = $self->{w}) { # triggers remove_by_docid in a shard child
my ($over_fn) = $over->{dbh}->sqlite_db_filename;
$over_fn =~ tr/\n/\0/;
print $w "O $over_fn\n" or die "failed to write over $!";
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index e9a43000..5aec7561 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -1141,7 +1141,7 @@ sub unindex_oid_aux ($$$) {
my @removed = $self->{oidx}->remove_oid($oid, $mid);
for my $num (@removed) {
my $idx = idx_shard($self, $num);
- $idx->shard_remove($oid, $num);
+ $idx->shard_remove($num);
}
}
^ permalink raw reply related [relevance 5%]
* [PATCH 0/5] extindex: random cleanups
@ 2020-12-07 7:40 7% Eric Wong
2020-12-07 7:40 5% ` [PATCH 4/5] searchidx: remove $oid parameter from most calls Eric Wong
0 siblings, 1 reply; 2+ results
From: Eric Wong @ 2020-12-07 7:40 UTC (permalink / raw)
To: meta
Still working on --reindex, but found a bunch of cleanups
and tweaks which are worth doing in any case.
Eric Wong (5):
over: gracefully show invalid ibx_id
overidx: wrap eidx_key => ibx_id mapping
extsearchidx: remove needless SHA-1 check
searchidx: remove $oid parameter from most calls
shard_add_eidx_info: pass $eidx_key instead of $ibx object
lib/PublicInbox/ExtSearchIdx.pm | 27 +++++-----------------
lib/PublicInbox/Over.pm | 1 +
lib/PublicInbox/OverIdx.pm | 10 +++++---
lib/PublicInbox/SearchIdx.pm | 38 +++++++++++++------------------
lib/PublicInbox/SearchIdxShard.pm | 38 ++++++++++++++-----------------
lib/PublicInbox/V2Writable.pm | 2 +-
6 files changed, 48 insertions(+), 68 deletions(-)
^ permalink raw reply [relevance 7%]
Results 1-2 of 2 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2020-12-07 7:40 7% [PATCH 0/5] extindex: random cleanups Eric Wong
2020-12-07 7:40 5% ` [PATCH 4/5] searchidx: remove $oid parameter from most calls Eric Wong
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).