user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download mbox.gz: |
* [PATCH 00/11] cleanups, mostly indexing related
@ 2020-09-02 11:04  7% Eric Wong
  2020-09-02 11:04  5% ` [PATCH 02/11] disambiguate OverIdx and Over by field name Eric Wong
  0 siblings, 1 reply; 2+ results
From: Eric Wong @ 2020-09-02 11:04 UTC (permalink / raw)
  To: meta

Some cleanups ahead of detached index support.

Found some dead code, too.

Eric Wong (11):
  msgmap: note how we use ->created_at
  disambiguate OverIdx and Over by field name
  use more idiomatic internal API for ->over access
  search: remove special case for blank query
  tests: add "use strict" and declare v5.10.1 compatibility
  search: replace ->query with ->mset
  search: remove {over_ro} field
  imap: drop old, pre-Parse::RecDescent search parser
  wwwaltid: drop unused sqlite3_missing function
  overidx: document column uses
  v2writable: reuse read-only shard counting code

 lib/PublicInbox/ExtMsg.pm     |   4 +-
 lib/PublicInbox/IMAP.pm       |  63 +----------------
 lib/PublicInbox/Inbox.pm      |  11 ++-
 lib/PublicInbox/Mbox.pm       |   6 +-
 lib/PublicInbox/Msgmap.pm     |   1 +
 lib/PublicInbox/OverIdx.pm    |  18 ++---
 lib/PublicInbox/Search.pm     |  32 ++++-----
 lib/PublicInbox/SearchIdx.pm  |  32 ++++-----
 lib/PublicInbox/SearchView.pm |   3 +-
 lib/PublicInbox/SolverGit.pm  |   5 +-
 lib/PublicInbox/V2Writable.pm |  59 ++++++----------
 lib/PublicInbox/WwwAltId.pm   |  16 +----
 scripts/dupe-finder           |   3 +-
 t/altid.t                     |   8 +--
 t/altid_v2.t                  |   7 +-
 t/index-git-times.t           |  17 +++--
 t/indexlevels-mirror.t        |   8 +--
 t/mda_filter_rubylang.t       |   6 +-
 t/replace.t                   |   8 +--
 t/search-thr-index.t          |   8 +--
 t/search.t                    | 126 +++++++++++++++++-----------------
 t/v1reindex.t                 |   4 +-
 t/v2mda.t                     |  16 +++--
 t/v2mirror.t                  |  24 +++----
 t/v2reindex.t                 |   9 +--
 t/v2writable.t                |  14 ++--
 t/watch_filter_rubylang.t     |  12 ++--
 t/watch_maildir_v2.t          |  30 ++++----
 t/xcpdb-reshard.t             |   3 +-
 xt/eml_check_limits.t         |   2 +
 xt/perf-threading.t           |   2 +-
 31 files changed, 232 insertions(+), 325 deletions(-)


^ permalink raw reply	[relevance 7%]

* [PATCH 02/11] disambiguate OverIdx and Over by field name
  2020-09-02 11:04  7% [PATCH 00/11] cleanups, mostly indexing related Eric Wong
@ 2020-09-02 11:04  5% ` Eric Wong
  0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2020-09-02 11:04 UTC (permalink / raw)
  To: meta

We'll use {oidx} as the common field name for the read-write
OverIdx, here, to disambiguate it from the read-only {over}
field.  This hopefully makes it clearer which code paths are
read-only and which are read-write.
---
 lib/PublicInbox/SearchIdx.pm  | 32 ++++++++++++++-----------------
 lib/PublicInbox/V2Writable.pm | 36 +++++++++++++++++------------------
 t/search-thr-index.t          |  8 ++++----
 t/search.t                    |  2 +-
 4 files changed, 37 insertions(+), 41 deletions(-)

diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 3f2da6ab..eb620f44 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -69,8 +69,8 @@ sub new {
 	if ($version == 1) {
 		$self->{lock_path} = "$inboxdir/ssoma.lock";
 		my $dir = $self->xdir;
-		$self->{over} = PublicInbox::OverIdx->new("$dir/over.sqlite3");
-		$self->{over}->{-no_fsync} = 1 if $ibx->{-no_fsync};
+		$self->{oidx} = PublicInbox::OverIdx->new("$dir/over.sqlite3");
+		$self->{oidx}->{-no_fsync} = 1 if $ibx->{-no_fsync};
 	} elsif ($version == 2) {
 		defined $shard or die "shard is required for v2\n";
 		# shard is a number
@@ -419,8 +419,8 @@ sub add_message {
 		# of the fields which exist in over.sqlite3.  We may stop
 		# storing doc_data in Xapian sometime after we get multi-inbox
 		# search working.
-		if (my $over = $self->{over}) { # v1 only
-			$over->add_overview($mime, $smsg);
+		if (my $oidx = $self->{oidx}) { # v1 only
+			$oidx->add_overview($mime, $smsg);
 		}
 		if (need_xapian($self)) {
 			add_xapian($self, $mime, $smsg, $mids);
@@ -457,7 +457,7 @@ sub xdb_remove {
 
 sub remove_by_oid {
 	my ($self, $oid, $num) = @_;
-	die "BUG: remove_by_oid is v2-only\n" if $self->{over};
+	die "BUG: remove_by_oid is v2-only\n" if $self->{oidx};
 	$self->begin_txn_lazy;
 	xdb_remove($self, $oid, $num) if need_xapian($self);
 }
@@ -479,13 +479,9 @@ sub unindex_eml {
 	my $nr = 0;
 	my %tmp;
 	for my $mid (@$mids) {
-		my @removed = eval { $self->{over}->remove_oid($oid, $mid) };
-		if ($@) {
-			warn "E: failed to remove <$mid> from overview: $@\n";
-		} else {
-			$nr += scalar @removed;
-			$tmp{$_}++ for @removed;
-		}
+		my @removed = $self->{oidx}->remove_oid($oid, $mid);
+		$nr += scalar @removed;
+		$tmp{$_}++ for @removed;
 	}
 	if (!$nr) {
 		$mids = join('> <', @$mids);
@@ -507,9 +503,9 @@ sub index_mm {
 	my $mids = mids($mime);
 	my $mm = $self->{mm};
 	if ($sync->{reindex}) {
-		my $over = $self->{over};
+		my $oidx = $self->{oidx};
 		for my $mid (@$mids) {
-			my ($num, undef) = $over->num_mid0_for_oid($oid, $mid);
+			my ($num, undef) = $oidx->num_mid0_for_oid($oid, $mid);
 			return $num if defined $num;
 		}
 		$mm->num_for($mids->[0]) // $mm->mid_insert($mids->[0]);
@@ -603,7 +599,7 @@ sub v1_checkpoint ($$;$) {
 		}
 	}
 
-	$self->{over}->rethread_done($sync->{-opt}) if $newest; # all done
+	$self->{oidx}->rethread_done($sync->{-opt}) if $newest; # all done
 	commit_txn_lazy($self);
 	$self->{ibx}->git->cleanup;
 	my $nr = ${$sync->{nr}};
@@ -773,7 +769,7 @@ sub _index_sync {
 	my $pr = $opt->{-progress};
 	my $sync = { reindex => $opt->{reindex}, -opt => $opt };
 	my $xdb = $self->begin_txn_lazy;
-	$self->{over}->rethread_prepare($opt);
+	$self->{oidx}->rethread_prepare($opt);
 	my $mm = _msgmap_init($self);
 	if ($sync->{reindex}) {
 		my $last = $mm->last_commit;
@@ -804,7 +800,7 @@ sub DESTROY {
 sub _begin_txn {
 	my ($self) = @_;
 	my $xdb = $self->{xdb} || idx_acquire($self);
-	$self->{over}->begin_lazy if $self->{over};
+	$self->{oidx}->begin_lazy if $self->{oidx};
 	$xdb->begin_transaction if $xdb;
 	$self->{txn} = 1;
 	$xdb;
@@ -844,7 +840,7 @@ sub _commit_txn {
 		set_metadata_once($self);
 		$xdb->commit_transaction;
 	}
-	$self->{over}->commit_lazy if $self->{over};
+	$self->{oidx}->commit_lazy if $self->{oidx};
 }
 
 sub commit_txn_lazy {
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index 553dd839..c8334645 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -114,13 +114,13 @@ sub new {
 		total_bytes => 0,
 		current_info => '',
 		xpfx => $xpfx,
-		over => PublicInbox::OverIdx->new("$xpfx/over.sqlite3"),
+		oidx => PublicInbox::OverIdx->new("$xpfx/over.sqlite3"),
 		lock_path => "$dir/inbox.lock",
 		# limit each git repo (epoch) to 1GB or so
 		rotate_bytes => int((1024 * 1024 * 1024) / $PACKING_FACTOR),
 		last_commit => [], # git epoch -> commit
 	};
-	$self->{over}->{-no_fsync} = 1 if $v2ibx->{-no_fsync};
+	$self->{oidx}->{-no_fsync} = 1 if $v2ibx->{-no_fsync};
 	$self->{shards} = count_shards($self) || nproc_shards($creat);
 	bless $self, $class;
 }
@@ -154,7 +154,7 @@ sub add {
 sub do_idx ($$$$) {
 	my ($self, $msgref, $mime, $smsg) = @_;
 	$smsg->{bytes} = $smsg->{raw_bytes} + crlf_adjust($$msgref);
-	$self->{over}->add_overview($mime, $smsg);
+	$self->{oidx}->add_overview($mime, $smsg);
 	my $idx = idx_shard($self, $smsg->{num} % $self->{shards});
 	$idx->index_raw($msgref, $mime, $smsg);
 	my $n = $self->{transact_bytes} += $smsg->{raw_bytes};
@@ -219,7 +219,7 @@ sub v2_num_for {
 		if ($altid && grep(/:file=msgmap\.sqlite3\z/, @$altid)) {
 			my $num = $self->{mm}->num_for($mid);
 
-			if (defined $num && !$self->{over}->get_art($num)) {
+			if (defined $num && !$self->{oidx}->get_art($num)) {
 				return ($num, $mid);
 			}
 		}
@@ -274,7 +274,7 @@ sub idx_shard {
 sub _idx_init { # with_umask callback
 	my ($self, $opt) = @_;
 	$self->lock_acquire unless $opt && $opt->{-skip_lock};
-	$self->{over}->create;
+	$self->{oidx}->create;
 
 	# xcpdb can change shard count while -watch is idle
 	my $nshards = count_shards($self);
@@ -381,7 +381,7 @@ sub rewrite_internal ($$;$$$) {
 	} else {
 		$im = $self->importer;
 	}
-	my $over = $self->{over};
+	my $oidx = $self->{oidx};
 	my $chashes = content_hashes($old_eml);
 	my $removed = [];
 	my $mids = mids($old_eml);
@@ -395,7 +395,7 @@ sub rewrite_internal ($$;$$$) {
 	foreach my $mid (@$mids) {
 		my %gone; # num => [ smsg, $mime, raw ]
 		my ($id, $prev);
-		while (my $smsg = $over->next_by_mid($mid, \$id, \$prev)) {
+		while (my $smsg = $oidx->next_by_mid($mid, \$id, \$prev)) {
 			my $msg = get_blob($self, $smsg);
 			if (!defined($msg)) {
 				warn "broken smsg for $mid\n";
@@ -623,7 +623,7 @@ sub checkpoint ($;$) {
 		$dbh->commit;
 
 		# SQLite overview is third
-		$self->{over}->commit_lazy;
+		$self->{oidx}->commit_lazy;
 
 		# Now deal with Xapian
 		if ($wait) {
@@ -682,7 +682,7 @@ sub done {
 			$err .= "shard close: $@\n" if $@;
 		}
 	}
-	eval { $self->{over}->dbh_close };
+	eval { $self->{oidx}->dbh_close };
 	$err .= "over close: $@\n" if $@;
 	delete $self->{bnote};
 	my $nbytes = $self->{total_bytes};
@@ -844,10 +844,10 @@ sub get_blob ($$) {
 
 sub content_exists ($$$) {
 	my ($self, $mime, $mid) = @_;
-	my $over = $self->{over};
+	my $oidx = $self->{oidx};
 	my $chashes = content_hashes($mime);
 	my ($id, $prev);
-	while (my $smsg = $over->next_by_mid($mid, \$id, \$prev)) {
+	while (my $smsg = $oidx->next_by_mid($mid, \$id, \$prev)) {
 		my $msg = get_blob($self, $smsg);
 		if (!defined($msg)) {
 			warn "broken smsg for $mid\n";
@@ -917,9 +917,9 @@ sub index_oid { # cat_async callback
 		}
 	}
 	if (!defined($num)) { # reuse if reindexing (or duplicates)
-		my $over = $self->{over};
+		my $oidx = $self->{oidx};
 		for my $mid (@$mids) {
-			($num, $mid0) = $over->num_mid0_for_oid($oid, $mid);
+			($num, $mid0) = $oidx->num_mid0_for_oid($oid, $mid);
 			last if defined $num;
 		}
 	}
@@ -1107,7 +1107,7 @@ sub sync_prepare ($$$) {
 
 sub unindex_oid_remote ($$$) {
 	my ($self, $oid, $mid) = @_;
-	my @removed = $self->{over}->remove_oid($oid, $mid);
+	my @removed = $self->{oidx}->remove_oid($oid, $mid);
 	for my $num (@removed) {
 		my $idx = idx_shard($self, $num % $self->{shards});
 		$idx->shard_remove($oid, $num);
@@ -1121,11 +1121,11 @@ sub unindex_oid ($$;$) { # git->cat_async callback
 	my $mm = $self->{mm};
 	my $mids = mids(PublicInbox::Eml->new($bref));
 	undef $$bref;
-	my $over = $self->{over};
+	my $oidx = $self->{oidx};
 	foreach my $mid (@$mids) {
 		my %gone;
 		my ($id, $prev);
-		while (my $smsg = $over->next_by_mid($mid, \$id, \$prev)) {
+		while (my $smsg = $oidx->next_by_mid($mid, \$id, \$prev)) {
 			$gone{$smsg->{num}} = 1 if $oid eq $smsg->{blob};
 		}
 		my $n = scalar(keys(%gone)) or next;
@@ -1299,7 +1299,7 @@ sub index_sync {
 
 	$self->idx_init($opt); # acquire lock
 	fill_alternates($self, $epoch_max);
-	$self->{over}->rethread_prepare($opt);
+	$self->{oidx}->rethread_prepare($opt);
 	my $sync = {
 		need_checkpoint => \(my $bool = 0),
 		unindex_range => {}, # EPOCH => oid_old..oid_new
@@ -1329,7 +1329,7 @@ sub index_sync {
 	}
 	# work forwards through history
 	index_epoch($self, $sync, $_) for (0..$epoch_max);
-	$self->{over}->rethread_done($opt);
+	$self->{oidx}->rethread_done($opt);
 	$self->done;
 
 	if (my $nr = $sync->{nr}) {
diff --git a/t/search-thr-index.t b/t/search-thr-index.t
index b5a5ff1f..bd663519 100644
--- a/t/search-thr-index.t
+++ b/t/search-thr-index.t
@@ -60,9 +60,9 @@ foreach (reverse split(/\n\n/, $data)) {
 
 my $prev;
 my %tids;
-my $dbh = $rw->{over}->dbh;
+my $dbh = $rw->{oidx}->dbh;
 foreach my $mid (@mids) {
-	my $msgs = $rw->{over}->get_thread($mid);
+	my $msgs = $rw->{oidx}->get_thread($mid);
 	is(3, scalar(@$msgs), "got all messages from $mid");
 	foreach my $m (@$msgs) {
 		my $tid = $dbh->selectrow_array(<<'', undef, $m->{num});
@@ -84,9 +84,9 @@ Message-Id: <1-bw@g>
 From: bw@g
 To: git@vger.kernel.org
 
-	my $dbh = $rw->{over}->dbh;
+	my $dbh = $rw->{oidx}->dbh;
 	my ($id, $prev);
-	my $reidx = $rw->{over}->next_by_mid('1-bw@g', \$id, \$prev);
+	my $reidx = $rw->{oidx}->next_by_mid('1-bw@g', \$id, \$prev);
 	ok(defined $reidx);
 	my $num = $reidx->{num};
 	my $tid0 = $dbh->selectrow_array(<<'', undef, $num);
diff --git a/t/search.t b/t/search.t
index e2290ecd..f026e509 100644
--- a/t/search.t
+++ b/t/search.t
@@ -161,7 +161,7 @@ are real
 EOF
 	my $ghost_id = $rw->add_message($was_ghost);
 	is($ghost_id, int($ghost_id), "ghost_id is an integer: $ghost_id");
-	my $msgs = $rw->{over}->get_thread('ghost-message@s');
+	my $msgs = $rw->{oidx}->get_thread('ghost-message@s');
 	is(scalar(@$msgs), 2, 'got both messages in ghost thread');
 	foreach (qw(sid tid)) {
 		is($msgs->[0]->{$_}, $msgs->[1]->{$_}, "{$_} match");

^ permalink raw reply related	[relevance 5%]

Results 1-2 of 2 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2020-09-02 11:04  7% [PATCH 00/11] cleanups, mostly indexing related Eric Wong
2020-09-02 11:04  5% ` [PATCH 02/11] disambiguate OverIdx and Over by field name Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).