user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
 Warning: Initial query:
 %22lei/store: cull redundant docids based on blob OID%22
 returned no results, used:
 "lei/store: cull redundant docids based on blob OID"
 instead

Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download mbox.gz: |
* [PATCH] lei/store: do not put NULL into over.num column
@ 2021-06-18 19:20  7% Eric Wong
  0 siblings, 0 replies; 3+ results
From: Eric Wong @ 2021-06-18 19:20 UTC (permalink / raw)
  To: meta

Simplify oid2docid and filter out undefined docids in ->add_eml,
instead.  This avoids SQLite "datatype mismatch" errors in
OverIdx->add_over

Fixes: d1052f03ea85d4af ("lei/store: cull redundant docids based on blob OID")
---
 lib/PublicInbox/LeiStore.pm | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm
index 4ba1e647..e26b622d 100644
--- a/lib/PublicInbox/LeiStore.pm
+++ b/lib/PublicInbox/LeiStore.pm
@@ -269,7 +269,7 @@ W: $oid indexed as multiple docids: $docid @cull, culling to fixup old bugs
 EOF
 		remove_docids($self, @cull);
 	}
-	wantarray ? ($docid) : $docid;
+	$docid;
 }
 
 sub add_eml {
@@ -292,8 +292,8 @@ sub add_eml {
 		if (scalar keys %$xoids) {
 			my %docids = map { $_ => 1 } @$vivify_xvmd;
 			for my $oid (keys %$xoids) {
-				my @id = oid2docid($self, $oid);
-				@docids{@id} = @id;
+				my $docid = oid2docid($self, $oid);
+				$docids{$docid} = $docid if defined($docid);
 			}
 			@$vivify_xvmd = sort { $a <=> $b } keys(%docids);
 		}

^ permalink raw reply related	[relevance 7%]

* [PATCH 3/3] lei/store: cull redundant docids based on blob OID
  2021-06-17 22:00  6% [PATCH 0/3] lei: internal bug fixups Eric Wong
@ 2021-06-17 22:00  6% ` Eric Wong
  0 siblings, 0 replies; 3+ results
From: Eric Wong @ 2021-06-17 22:00 UTC (permalink / raw)
  To: meta

I'm not sure how this happened (only once for me in March), but
it should not happen...  In any case, we'll operate on the
lowest numbered docid and cull redundant index entries when
lei/store is open for read-write.

This also fixes the normal lei/store removal path to clean up
the xref3 table (since it's not done automatically for
public-facing -eidx due to the multi-list nature of it).
---
 lib/PublicInbox/LeiStore.pm  | 54 +++++++++++++++++++++++-------------
 lib/PublicInbox/SearchIdx.pm |  2 +-
 2 files changed, 36 insertions(+), 20 deletions(-)

diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm
index f978288a..4ba1e647 100644
--- a/lib/PublicInbox/LeiStore.pm
+++ b/lib/PublicInbox/LeiStore.pm
@@ -226,6 +226,18 @@ sub _remove_if_local { # git->cat_async arg
 	$self->{im}->remove($bref) if $bref;
 }
 
+sub remove_docids ($;@) {
+	my ($self, @docids) = @_;
+	my $eidx = eidx_init($self);
+	for my $docid (@docids) {
+		$eidx->idx_shard($docid)->ipc_do('xdb_remove', $docid);
+		$self->{oidx}->delete_by_num($docid);
+		$self->{oidx}->{dbh}->do(<<EOF, undef, $docid);
+DELETE FROM xref3 WHERE docid = ?
+EOF
+	}
+}
+
 # remove the entire message from the index, does not touch mail_sync.sqlite3
 sub remove_eml {
 	my ($self, $eml) = @_;
@@ -241,13 +253,25 @@ sub remove_eml {
 			my $oidhex = unpack('H*', $oidbin);
 			$git->cat_async($oidhex, \&_remove_if_local, $self);
 		}
-		$eidx->idx_shard($docid)->ipc_do('xdb_remove', $docid);
-		$oidx->delete_by_num($docid);
 	}
 	$git->cat_async_wait;
+	remove_docids($self, @docids);
 	\@docids;
 }
 
+sub oid2docid ($$) {
+	my ($self, $oid) = @_;
+	my $eidx = eidx_init($self);
+	my ($docid, @cull) = $eidx->{oidx}->blob_exists($oid);
+	if (@cull) { # fixup old bugs...
+		warn <<EOF;
+W: $oid indexed as multiple docids: $docid @cull, culling to fixup old bugs
+EOF
+		remove_docids($self, @cull);
+	}
+	wantarray ? ($docid) : $docid;
+}
+
 sub add_eml {
 	my ($self, $eml, $vmd, $xoids) = @_;
 	my $im = $self->{-fake_im} // $self->importer; # may create new epoch
@@ -268,7 +292,7 @@ sub add_eml {
 		if (scalar keys %$xoids) {
 			my %docids = map { $_ => 1 } @$vivify_xvmd;
 			for my $oid (keys %$xoids) {
-				my @id = $oidx->blob_exists($oid);
+				my @id = oid2docid($self, $oid);
 				@docids{@id} = @id;
 			}
 			@$vivify_xvmd = sort { $a <=> $b } keys(%docids);
@@ -356,15 +380,11 @@ sub update_xvmd {
 	my $oidx = $eidx->{oidx};
 	my %seen;
 	for my $oid (keys %$xoids) {
-		my @docids = $oidx->blob_exists($oid) or next;
-		scalar(@docids) > 1 and
-			warn "W: $oid indexed as multiple docids: @docids\n";
-		for my $docid (@docids) {
-			next if $seen{$docid}++;
-			my $idx = $eidx->idx_shard($docid);
-			$idx->ipc_do('update_vmd', $docid, $vmd_mod);
-		}
+		my $docid = oid2docid($self, $oid) // next;
 		delete $xoids->{$oid};
+		next if $seen{$docid}++;
+		my $idx = $eidx->idx_shard($docid);
+		$idx->ipc_do('update_vmd', $docid, $vmd_mod);
 	}
 	return unless scalar(keys(%$xoids));
 
@@ -395,15 +415,11 @@ sub set_xvmd {
 
 	# see if we can just update existing docs
 	for my $oid (keys %$xoids) {
-		my @docids = $oidx->blob_exists($oid) or next;
-		scalar(@docids) > 1 and
-			warn "W: $oid indexed as multiple docids: @docids\n";
-		for my $docid (@docids) {
-			next if $seen{$docid}++;
-			my $idx = $eidx->idx_shard($docid);
-			$idx->ipc_do('set_vmd', $docid, $vmd);
-		}
+		my $docid = oid2docid($self, $oid) // next;
 		delete $xoids->{$oid}; # all done with this oid
+		next if $seen{$docid}++;
+		my $idx = $eidx->idx_shard($docid);
+		$idx->ipc_do('set_vmd', $docid, $vmd);
 	}
 	return unless scalar(keys(%$xoids));
 
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index f066cc92..f553eda6 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -572,7 +572,7 @@ sub apply_vmd_mod ($$) {
 	my $updated = 0;
 	my @x = @VMD_MAP;
 	while (my ($field, $pfx) = splice(@x, 0, 2)) {
-		# field: "label" or "kw"
+		# field: "L" or "kw"
 		for my $val (@{$vmd_mod->{"-$field"} // []}) {
 			eval {
 				$doc->remove_term($pfx . $val);

^ permalink raw reply related	[relevance 6%]

* [PATCH 0/3] lei: internal bug fixups
@ 2021-06-17 22:00  6% Eric Wong
  2021-06-17 22:00  6% ` [PATCH 3/3] lei/store: cull redundant docids based on blob OID Eric Wong
  0 siblings, 1 reply; 3+ results
From: Eric Wong @ 2021-06-17 22:00 UTC (permalink / raw)
  To: meta

Still chasing some oddness in day-to-day usage; but I think
3/3 is safe (1/3 helped me inspect things)

Eric Wong (3):
  lei inspect: learn "num:" and "docid:" prefixes
  lei_input: prefix bare Maildir paths w/ "maildir:"
  lei/store: cull redundant docids based on blob OID

 lib/PublicInbox/LeiInput.pm   |  3 +-
 lib/PublicInbox/LeiInspect.pm | 73 +++++++++++++++++++++++++++++++++++
 lib/PublicInbox/LeiStore.pm   | 54 +++++++++++++++++---------
 lib/PublicInbox/SearchIdx.pm  |  2 +-
 4 files changed, 111 insertions(+), 21 deletions(-)

^ permalink raw reply	[relevance 6%]

Results 1-3 of 3 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2021-06-17 22:00  6% [PATCH 0/3] lei: internal bug fixups Eric Wong
2021-06-17 22:00  6% ` [PATCH 3/3] lei/store: cull redundant docids based on blob OID Eric Wong
2021-06-18 19:20  7% [PATCH] lei/store: do not put NULL into over.num column Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).