From: Eric Wong <e@80x24.org> To: meta@public-inbox.org Subject: [PATCH 4/6] extindex: share unref logic in more places Date: Mon, 11 Oct 2021 08:06:18 +0000 [thread overview] Message-ID: <20211011080620.27478-5-e@80x24.org> (raw) In-Reply-To: <20211011080620.27478-1-e@80x24.org> We can use the same logic for --gc and --reindex and 'd' log entries They're similar enough and the actual need to unref should be fairly rare. We could go a lot faster if we didn't show progress for --gc and --reindex, actually. --- lib/PublicInbox/ExtSearchIdx.pm | 102 ++++++++++++-------------------- 1 file changed, 38 insertions(+), 64 deletions(-) diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm index 64cd8641585d..c0fd282358f9 100644 --- a/lib/PublicInbox/ExtSearchIdx.pm +++ b/lib/PublicInbox/ExtSearchIdx.pm @@ -129,32 +129,46 @@ sub apply_boost ($$) { $req->{self}->{oidx}->add_overview($req->{eml}, $new_smsg); } +sub _unref_doc ($$$$$;$) { + my ($sync, $docid, $ibx, $xnum, $oidbin, $eml) = @_; + my $s = 'DELETE FROM xref3 WHERE ibx_id = ? AND oidbin = ?'; + $s .= ' AND xnum = ?' if defined($xnum); + my $del = $sync->{self}->{oidx}->dbh->prepare_cached($s); + $del->bind_param(1, $ibx->{-ibx_id}); + $del->bind_param(2, $oidbin, SQL_BLOB); + $del->bind_param(3, $xnum) if defined($xnum); + $del->execute; + my $xr3 = $sync->{self}->{oidx}->get_xref3($docid, 1); + my $idx = $sync->{self}->idx_shard($docid); + if (scalar(@$xr3) == 0) { # all gone + $sync->{self}->{oidx}->delete_by_num($docid); + $sync->{self}->{oidx}->eidxq_del($docid); + $idx->ipc_do('xdb_remove', $docid); + } else { # enqueue for reindex of remaining messages + my $ekey = $ibx->{-gc_eidx_key} // $ibx->eidx_key; + $idx->ipc_do('remove_eidx_info', $docid, $ekey, $eml); + $sync->{self}->{oidx}->eidxq_add($docid); # yes, add + } + @$xr3 +} + sub do_xpost ($$) { my ($req, $smsg) = @_; my $self = $req->{self}; my $docid = $smsg->{num}; - my $idx = $self->idx_shard($docid); my $oid = $req->{oid}; my $xibx = $req->{ibx}; my $eml = $req->{eml}; - my $eidx_key = $xibx->eidx_key; if (my $new_smsg = $req->{new_smsg}) { # 'm' on cross-posted message + my $eidx_key = $xibx->eidx_key; my $xnum = $req->{xnum}; $self->{oidx}->add_xref3($docid, $xnum, $oid, $eidx_key); + my $idx = $self->idx_shard($docid); $idx->ipc_do('add_eidx_info', $docid, $eidx_key, $eml); apply_boost($req, $smsg) if $req->{boost_in_use}; - } else { # 'd' - my $rm_eidx_info; - my $nr = $self->{oidx}->remove_xref3($docid, $oid, $eidx_key, - \$rm_eidx_info); - if ($nr == 0) { - $self->{oidx}->eidxq_del($docid); - $idx->ipc_do('xdb_remove', $docid); - } elsif ($rm_eidx_info) { - $idx->ipc_do('remove_eidx_info', - $docid, $eidx_key, $eml); - $self->{oidx}->eidxq_add($docid); # yes, add - } + } else { # 'd' no {xnum} + $oid = pack('H*', $oid); + _unref_doc($req, $docid, $xibx, undef, $oid, $eml); } } @@ -345,36 +359,12 @@ sub _sync_inbox ($$$) { undef; } -sub gc_unref_doc ($$$$) { - my ($self, $ibx_id, $eidx_key, $docid) = @_; - my $remain = 0; - # for debug/info purposes, oids may no longer be accessible - my $dbh = $self->{oidx}->dbh; - my $sth = $dbh->prepare_cached(<<'', undef, 1); -SELECT oidbin FROM xref3 WHERE docid = ? AND ibx_id = ? - - $sth->execute($docid, $ibx_id); - my @oid = map { unpack('H*', $_->[0]) } @{$sth->fetchall_arrayref}; - for my $oid (@oid) { - $remain += $self->{oidx}->remove_xref3($docid, $oid, $eidx_key); - } - if ($remain) { - $self->{oidx}->eidxq_add($docid); # enqueue for reindex - for my $oid (@oid) { - warn "I: unref #$docid $eidx_key $oid\n"; - } - } else { - warn "I: remove #$docid $eidx_key @oid\n"; - $self->idx_shard($docid)->ipc_do('xdb_remove', $docid); - } -} - sub eidx_gc_scan_inboxes ($$) { my ($self, $sync) = @_; my ($x3_doc, $ibx_ck); restart: $x3_doc = $self->{oidx}->dbh->prepare(<<EOM); -SELECT docid FROM xref3 WHERE ibx_id = ? +SELECT docid,xnum,oidbin FROM xref3 WHERE ibx_id = ? EOM $ibx_ck = $self->{oidx}->dbh->prepare(<<EOM); SELECT ibx_id,eidx_key FROM inboxes @@ -385,8 +375,12 @@ EOM $self->{midx}->remove_eidx_key($eidx_key); warn "I: deleting messages for $eidx_key...\n"; $x3_doc->execute($ibx_id); - while (defined(my $docid = $x3_doc->fetchrow_array)) { - gc_unref_doc($self, $ibx_id, $eidx_key, $docid); + my $ibx = { -ibx_id => $ibx_id, -gc_eidx_key => $eidx_key }; + while (my ($docid, $xnum, $oid) = $x3_doc->fetchrow_array) { + my $r = _unref_doc($sync, $docid, $ibx, $xnum, $oid); + $oid = unpack('H*', $oid); + $r = $r ? 'unref' : 'remove'; + warn "I: $r #$docid $eidx_key $oid\n"; if (checkpoint_due($sync)) { $x3_doc = $ibx_ck = undef; reindex_checkpoint($self, $sync); @@ -470,6 +464,7 @@ sub eidx_gc { next_check => now() + 10, checkpoint_unlocks => 1, -opt => $opt, + self => $self, }; $self->idx_init($opt); # acquire lock via V2Writable::_idx_init eidx_gc_scan_inboxes($self, $sync); @@ -807,27 +802,6 @@ sub reindex_unseen ($$$$) { $self->git->cat_async($xsmsg->{blob}, \&_reindex_unseen, $req); } -sub _unref_stale ($$$$$) { - my ($sync, $docid, $ibx, $xnum, $oidbin) = @_; - my $del = $sync->{self}->{oidx}->dbh->prepare_cached(<<''); -DELETE FROM xref3 WHERE ibx_id = ? AND xnum = ? AND oidbin = ? - - $del->bind_param(1, $ibx->{-ibx_id}); - $del->bind_param(2, $xnum); - $del->bind_param(3, $oidbin, SQL_BLOB); - $del->execute; - my $xr3 = $sync->{self}->{oidx}->get_xref3($docid, 1); - my $idx = $sync->{self}->idx_shard($docid); - if (scalar(@$xr3) == 0) { # all gone - $sync->{self}->{oidx}->delete_by_num($docid); - $sync->{self}->{oidx}->eidxq_del($docid); - $idx->ipc_do('xdb_remove', $docid); - } else { # enqueue for reindex of remaining messages - $idx->ipc_do('remove_eidx_info', $docid, $ibx->eidx_key); - $sync->{self}->{oidx}->eidxq_add($docid); # yes, add - } -} - sub _unref_stale_range ($$$) { my ($sync, $ibx, $lt_or_gt) = @_; my $r; @@ -843,7 +817,7 @@ EOS my ($docid, $xnum, $oidbin) = @$_; my $hex = unpack('H*', $oidbin); warn("# $xnum:$hex (#$docid): stale\n"); - _unref_stale($sync, $docid, $ibx, $xnum, $oidbin); + _unref_doc($sync, $docid, $ibx, $xnum, $oidbin); } } while (scalar(@$r) == $lim); 1; @@ -913,7 +887,7 @@ ibx_id = ? AND xnum >= ? AND xnum <= ? my $m = defined($exp) ? "mismatch (!= $exp)" : 'stale'; warn("# $xnum:$hex (#@$docids): $m\n"); for my $i (@$docids) { - _unref_stale($sync, $i, $ibx, $xnum, $bin); + _unref_doc($sync, $i, $ibx, $xnum, $bin); } } }
next prev parent reply other threads:[~2021-10-11 8:06 UTC|newest] Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top 2021-10-11 8:06 [PATCH 0/6] extindex: --reindex --fast gets faster Eric Wong 2021-10-11 8:06 ` [PATCH 1/6] extindex: speed up --reindex --fast Eric Wong 2021-10-11 8:06 ` [PATCH 2/6] sqlite: PRAGMA optimize on close Eric Wong 2021-10-11 8:06 ` [PATCH 3/6] extindex: rename var: active => active_shards Eric Wong 2021-10-11 8:06 ` Eric Wong [this message] 2021-10-11 8:06 ` [PATCH 5/6] extindex: more consistent doc removal Eric Wong 2021-10-11 8:06 ` [PATCH 6/6] extindex: avoid invalid blobs after unref Eric Wong
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style List information: https://public-inbox.org/README * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20211011080620.27478-5-e@80x24.org \ --to=e@80x24.org \ --cc=meta@public-inbox.org \ --subject='Re: [PATCH 4/6] extindex: share unref logic in more places' \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
Code repositories for project(s) associated with this inbox: https://80x24.org/public-inbox.git This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).