user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
From: Eric Wong <e@yhbt.net>
To: meta@public-inbox.org
Subject: [PATCH 08/20] use consistent {ibx} field for writable code paths
Date: Fri, 24 Jul 2020 05:55:54 +0000	[thread overview]
Message-ID: <20200724055606.27332-9-e@yhbt.net> (raw)
In-Reply-To: <20200724055606.27332-1-e@yhbt.net>

This is a step which makes our use of abbreviations more
consistent when referring to PublicInbox::Inbox objects.
We'll also be reducing the number of redundant fields
in SearchIdx and V2Writable code paths to make the
object graph easier-to-follow.
---
 lib/PublicInbox/Import.pm         |  6 ++--
 lib/PublicInbox/SearchIdx.pm      | 31 ++++++++++----------
 lib/PublicInbox/SearchIdxShard.pm |  6 ++--
 lib/PublicInbox/V2Writable.pm     | 47 +++++++++++++++----------------
 4 files changed, 44 insertions(+), 46 deletions(-)

diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm
index d565b0a03..b50c662c7 100644
--- a/lib/PublicInbox/Import.pm
+++ b/lib/PublicInbox/Import.pm
@@ -35,7 +35,7 @@ sub new {
 		ident => "$name <$email>",
 		mark => 1,
 		ref => $ref,
-		-inbox => $ibx,
+		ibx => $ibx,
 		path_type => '2/38', # or 'v2'
 		lock_path => "$git->{git_dir}/ssoma.lock", # v2 changes this
 		bytes_added => 0,
@@ -176,7 +176,7 @@ sub _update_git_info ($$) {
 		run_die([@cmd, qw(read-tree -m -v -i), $self->{ref}], $env);
 	}
 	run_die([@cmd, 'update-server-info']);
-	my $ibx = $self->{-inbox};
+	my $ibx = $self->{ibx};
 	($ibx && $self->{path_type} eq '2/38') and eval {
 		require PublicInbox::SearchIdx;
 		my $s = PublicInbox::SearchIdx->new($ibx);
@@ -385,7 +385,7 @@ sub add {
 
 	# spam check:
 	if ($check_cb) {
-		$mime = $check_cb->($mime, $self->{-inbox}) or return;
+		$mime = $check_cb->($mime, $self->{ibx}) or return;
 	}
 
 	my $blob = $self->{mark}++;
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index e641ffd43..4b1b1736e 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -50,8 +50,7 @@ sub new {
 	$ibx = PublicInbox::InboxWritable->new($ibx);
 	my $self = bless {
 		inboxdir => $inboxdir,
-		-inbox => $ibx,
-		git => $ibx->git,
+		ibx => $ibx,
 		-altid => $altid,
 		ibx_ver => $version,
 		indexlevel => $indexlevel,
@@ -548,14 +547,14 @@ sub unindex_both { # git->cat_async callback
 sub index_sync {
 	my ($self, $opts) = @_;
 	delete $self->{lock_path} if $opts->{-skip_lock};
-	$self->{-inbox}->with_umask(\&_index_sync, $self, $opts);
+	$self->{ibx}->with_umask(\&_index_sync, $self, $opts);
 }
 
-sub too_big ($$$) {
-	my ($self, $git, $oid) = @_;
+sub too_big ($$) {
+	my ($self, $oid) = @_;
 	my $max_size = $self->{index_max_size} or return;
-	my (undef, undef, $size) = $git->check($oid);
-	die "E: bad $oid in $git->{git_dir}\n" if !defined($size);
+	my (undef, undef, $size) = $self->{ibx}->git->check($oid);
+	die "E: bad $oid in $self->{ibx}->{inboxdir}\n" if !defined($size);
 	return if $size <= $max_size;
 	warn "W: skipping $oid ($size > $max_size)\n";
 	1;
@@ -568,7 +567,7 @@ sub read_log {
 	my $h40 = $hex .'{40}';
 	my $addmsg = qr!^:000000 100644 \S+ ($h40) A\t${hex}{2}/${hex}{38}$!;
 	my $delmsg = qr!^:100644 000000 ($h40) \S+ D\t${hex}{2}/${hex}{38}$!;
-	my $git = $self->{git};
+	my $git = $self->{ibx}->git;
 	my $latest;
 	my $max = $BATCH_BYTES;
 	local $/ = "\n";
@@ -591,7 +590,7 @@ sub read_log {
 				}
 				next;
 			}
-			next if too_big($self, $git, $blob);
+			next if too_big($self, $blob);
 			$git->cat_async($blob, \&index_both, { %$sync });
 			if ($max <= 0) {
 				$git->cat_async_wait;
@@ -600,7 +599,7 @@ sub read_log {
 			}
 		} elsif ($line =~ /$delmsg/o) {
 			my $blob = $1;
-			$D{$blob} = 1 unless too_big($self, $git, $blob);
+			$D{$blob} = 1 unless too_big($self, $blob);
 		} elsif ($line =~ /^commit ($h40)/o) {
 			$latest = $1;
 			$newest ||= $latest;
@@ -621,7 +620,7 @@ sub read_log {
 
 sub _git_log {
 	my ($self, $opts, $range) = @_;
-	my $git = $self->{git};
+	my $git = $self->{ibx}->git;
 
 	if (index($range, '..') < 0) {
 		# don't show annoying git errors to users who run -index
@@ -681,7 +680,7 @@ sub is_ancestor ($$$) {
 
 sub need_update ($$$) {
 	my ($self, $cur, $new) = @_;
-	my $git = $self->{git};
+	my $git = $self->{ibx}->git;
 	return 1 if $cur && !is_ancestor($git, $cur, $new);
 	my $range = $cur eq '' ? $new : "$cur..$new";
 	chomp(my $n = $git->qx(qw(rev-list --count), $range));
@@ -701,7 +700,7 @@ sub _last_x_commit {
 		$lx = $lm;
 	}
 	# Use last_commit from msgmap if it is older or unset
-	if (!$lm || ($lx && $lm && is_ancestor($self->{git}, $lm, $lx))) {
+	if (!$lm || ($lx && $lm && is_ancestor($self->{ibx}->git, $lm, $lx))) {
 		$lx = $lm;
 	}
 	$lx;
@@ -718,7 +717,7 @@ sub _index_sync {
 	my ($self, $opts) = @_;
 	my $tip = $opts->{ref} || 'HEAD';
 	my ($last_commit, $lx, $xlog);
-	my $git = $self->{git};
+	my $git = $self->{ibx}->git;
 	$git->batch_prepare;
 	my $pr = $opts->{-progress};
 
@@ -830,7 +829,7 @@ sub _begin_txn {
 
 sub begin_txn_lazy {
 	my ($self) = @_;
-	$self->{-inbox}->with_umask(\&_begin_txn, $self) if !$self->{txn};
+	$self->{ibx}->with_umask(\&_begin_txn, $self) if !$self->{txn};
 }
 
 # store 'indexlevel=medium' in v2 shard=0 and v1 (only one shard)
@@ -860,7 +859,7 @@ sub _commit_txn {
 sub commit_txn_lazy {
 	my ($self) = @_;
 	delete($self->{txn}) and
-		$self->{-inbox}->with_umask(\&_commit_txn, $self);
+		$self->{ibx}->with_umask(\&_commit_txn, $self);
 }
 
 sub worker_done {
diff --git a/lib/PublicInbox/SearchIdxShard.pm b/lib/PublicInbox/SearchIdxShard.pm
index 544268819..fd34e487b 100644
--- a/lib/PublicInbox/SearchIdxShard.pm
+++ b/lib/PublicInbox/SearchIdxShard.pm
@@ -11,14 +11,14 @@ use IO::Handle (); # autoflush
 use PublicInbox::Eml;
 
 sub new {
-	my ($class, $v2writable, $shard) = @_;
-	my $ibx = $v2writable->{-inbox};
+	my ($class, $v2w, $shard) = @_;
+	my $ibx = $v2w->{ibx};
 	my $self = $class->SUPER::new($ibx, 1, $shard);
 	# create the DB before forking:
 	$self->_xdb_acquire;
 	$self->set_indexlevel;
 	$self->_xdb_release;
-	$self->spawn_worker($v2writable, $shard) if $v2writable->{parallel};
+	$self->spawn_worker($v2w, $shard) if $v2w->{parallel};
 	$self;
 }
 
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index 2ff2fc259..a1986a469 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -109,7 +109,7 @@ sub new {
 
 	my $xpfx = "$dir/xap" . PublicInbox::Search::SCHEMA_VERSION;
 	my $self = {
-		-inbox => $v2ibx,
+		ibx => $v2ibx,
 		im => undef, #  PublicInbox::Import
 		parallel => 1,
 		transact_bytes => 0,
@@ -149,7 +149,7 @@ sub init_inbox {
 # mimics Import::add and wraps it for v2
 sub add {
 	my ($self, $eml, $check_cb) = @_;
-	$self->{-inbox}->with_umask(\&_add, $self, $eml, $check_cb);
+	$self->{ibx}->with_umask(\&_add, $self, $eml, $check_cb);
 }
 
 # indexes a message, returns true if checkpointing is needed
@@ -169,7 +169,7 @@ sub _add {
 
 	# spam check:
 	if ($check_cb) {
-		$mime = $check_cb->($mime, $self->{-inbox}) or return;
+		$mime = $check_cb->($mime, $self->{ibx}) or return;
 	}
 
 	# All pipes (> $^F) known to Perl 5.6+ have FD_CLOEXEC set,
@@ -218,7 +218,7 @@ sub v2_num_for {
 		# AltId may pre-populate article numbers (e.g. X-Mail-Count
 		# or NNTP article number), use that article number if it's
 		# not in Over.
-		my $altid = $self->{-inbox}->{altid};
+		my $altid = $self->{ibx}->{altid};
 		if ($altid && grep(/:file=msgmap\.sqlite3\z/, @$altid)) {
 			my $num = $self->{mm}->num_for($mid);
 
@@ -293,7 +293,7 @@ sub _idx_init { # with_umask callback
 	# Now that all subprocesses are up, we can open the FDs
 	# for SQLite:
 	my $mm = $self->{mm} = PublicInbox::Msgmap->new_file(
-		"$self->{-inbox}->{inboxdir}/msgmap.sqlite3", 1);
+		"$self->{ibx}->{inboxdir}/msgmap.sqlite3", 1);
 	$mm->{dbh}->begin_work;
 }
 
@@ -301,7 +301,7 @@ sub _idx_init { # with_umask callback
 sub idx_init {
 	my ($self, $opt) = @_;
 	return if $self->{idx_shards};
-	my $ibx = $self->{-inbox};
+	my $ibx = $self->{ibx};
 
 	# do not leak read-only FDs to child processes, we only have these
 	# FDs for duplicate detection so they should not be
@@ -329,7 +329,7 @@ sub idx_init {
 sub _replace_oids ($$$) {
 	my ($self, $mime, $replace_map) = @_;
 	$self->done;
-	my $pfx = "$self->{-inbox}->{inboxdir}/git";
+	my $pfx = "$self->{ibx}->{inboxdir}/git";
 	my $rewrites = []; # epoch => commit
 	my $max = $self->{epoch_max};
 
@@ -450,7 +450,7 @@ sub rewrite_internal ($$;$$$) {
 # (retval[2]) is not part of the stable API shared with Import->remove
 sub remove {
 	my ($self, $eml, $cmt_msg) = @_;
-	my $r = $self->{-inbox}->with_umask(\&rewrite_internal,
+	my $r = $self->{ibx}->with_umask(\&rewrite_internal,
 						$self, $eml, $cmt_msg);
 	defined($r) && defined($r->[0]) ? @$r: undef;
 }
@@ -458,7 +458,7 @@ sub remove {
 sub _replace ($$;$$) {
 	my ($self, $old_eml, $new_eml, $sref) = @_;
 	my $arg = [ $self, $old_eml, undef, $new_eml, $sref ];
-	my $rewritten = $self->{-inbox}->with_umask(\&rewrite_internal,
+	my $rewritten = $self->{ibx}->with_umask(\&rewrite_internal,
 			$self, $old_eml, undef, $new_eml, $sref) or return;
 
 	my $rewrites = $rewritten->{rewrites};
@@ -484,7 +484,7 @@ sub git_hash_raw ($$) {
 	my ($self, $raw) = @_;
 	# grab the expected OID we have to reindex:
 	pipe(my($in, $w)) or die "pipe: $!";
-	my $git_dir = $self->{-inbox}->git->{git_dir};
+	my $git_dir = $self->{ibx}->git->{git_dir};
 	my $cmd = ['git', "--git-dir=$git_dir", qw(hash-object --stdin)];
 	my $r = popen_rd($cmd, undef, { 0 => $in });
 	print $w $$raw or die "print \$w: $!";
@@ -550,11 +550,11 @@ W: $list
 	}
 
 	# make sure we really got the OID:
-	my ($blob, $type, $bytes) = $self->{-inbox}->git->check($expect_oid);
+	my ($blob, $type, $bytes) = $self->{ibx}->git->check($expect_oid);
 	$blob eq $expect_oid or die "BUG: $expect_oid not found after replace";
 
 	# don't leak FDs to Xapian:
-	$self->{-inbox}->git->cleanup;
+	$self->{ibx}->git->cleanup;
 
 	# reindex modified messages:
 	for my $smsg (@$need_reindex) {
@@ -674,14 +674,14 @@ sub done {
 	my $nbytes = $self->{total_bytes};
 	$self->{total_bytes} = 0;
 	$self->lock_release(!!$nbytes) if $shards;
-	$self->{-inbox}->git->cleanup;
+	$self->{ibx}->git->cleanup;
 }
 
 sub fill_alternates ($$) {
 	my ($self, $epoch) = @_;
 
-	my $pfx = "$self->{-inbox}->{inboxdir}/git";
-	my $all = "$self->{-inbox}->{inboxdir}/all.git";
+	my $pfx = "$self->{ibx}->{inboxdir}/git";
+	my $all = "$self->{ibx}->{inboxdir}/all.git";
 	PublicInbox::Import::init_bare($all) unless -d $all;
 	my $info_dir = "$all/objects/info";
 	my $alt = "$info_dir/alternates";
@@ -726,7 +726,7 @@ sub fill_alternates ($$) {
 
 sub git_init {
 	my ($self, $epoch) = @_;
-	my $git_dir = "$self->{-inbox}->{inboxdir}/git/$epoch.git";
+	my $git_dir = "$self->{ibx}->{inboxdir}/git/$epoch.git";
 	PublicInbox::Import::init_bare($git_dir);
 	my @cmd = (qw/git config/, "--file=$git_dir/config",
 			'include.path', '../../all.git/config');
@@ -738,7 +738,7 @@ sub git_init {
 sub git_dir_latest {
 	my ($self, $max) = @_;
 	$$max = -1;
-	my $pfx = "$self->{-inbox}->{inboxdir}/git";
+	my $pfx = "$self->{ibx}->{inboxdir}/git";
 	return unless -d $pfx;
 	my $latest;
 	opendir my $dh, $pfx or die "opendir $pfx: $!\n";
@@ -790,7 +790,7 @@ sub importer {
 
 sub import_init {
 	my ($self, $git, $packed_bytes, $tmp) = @_;
-	my $im = PublicInbox::Import->new($git, undef, undef, $self->{-inbox});
+	my $im = PublicInbox::Import->new($git, undef, undef, $self->{ibx});
 	$im->{bytes_added} = int($packed_bytes / $PACKING_FACTOR);
 	$im->{lock_path} = undef;
 	$im->{path_type} = 'v2';
@@ -823,8 +823,7 @@ sub get_blob ($$) {
 		return $msg if $msg;
 	}
 	# older message, should be in alternates
-	my $ibx = $self->{-inbox};
-	$ibx->msg_by_smsg($smsg);
+	$self->{ibx}->msg_by_smsg($smsg);
 }
 
 sub content_exists ($$$) {
@@ -881,7 +880,7 @@ sub reindex_checkpoint ($$$) {
 
 sub reindex_oid ($$$$) {
 	my ($self, $sync, $git, $oid) = @_;
-	return if PublicInbox::SearchIdx::too_big($self, $git, $oid);
+	return if PublicInbox::SearchIdx::too_big($self, $oid);
 	my ($num, $mid0, $len);
 	my $msgref = $git->cat_file($oid, \$len);
 	return if $len == 0; # purged
@@ -968,7 +967,7 @@ sub update_last_commit ($$$$) {
 	last_epoch_commit($self, $i, $cmt);
 }
 
-sub git_dir_n ($$) { "$_[0]->{-inbox}->{inboxdir}/git/$_[1].git" }
+sub git_dir_n ($$) { "$_[0]->{ibx}->{inboxdir}/git/$_[1].git" }
 
 sub last_commits ($$) {
 	my ($self, $epoch_max) = @_;
@@ -1077,7 +1076,7 @@ sub sync_prepare ($$$) {
 	my ($self, $sync, $epoch_max) = @_;
 	my $pr = $sync->{-opt}->{-progress};
 	my $regen_max = 0;
-	my $head = $self->{-inbox}->{ref_head} || 'refs/heads/master';
+	my $head = $self->{ibx}->{ref_head} || 'refs/heads/master';
 
 	# reindex stops at the current heads and we later rerun index_sync
 	# without {reindex}
@@ -1108,7 +1107,7 @@ sub sync_prepare ($$$) {
 	# our code and blindly injects "d" file history into git repos
 	if (my @leftovers = keys %{delete($sync->{D}) // {}}) {
 		warn('W: unindexing '.scalar(@leftovers)." leftovers\n");
-		my $git = $self->{-inbox}->git;
+		my $git = $self->{ibx}->git;
 		for my $oid (@leftovers) {
 			$oid = unpack('H*', $oid);
 			$self->{current_info} = "leftover $oid";

  parent reply	other threads:[~2020-07-24  5:56 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-07-24  5:55 [PATCH 00/20] indexing changes and new features Eric Wong
2020-07-24  5:55 ` [PATCH 01/20] index: support --rethread switch to fix old indices Eric Wong
2020-07-24  5:55 ` [PATCH 02/20] v2: index forwards (via `git log --reverse') Eric Wong
2020-07-24  5:55 ` [PATCH 03/20] v2writable: introduce idx_stack Eric Wong
2020-07-24  5:55 ` [PATCH 04/20] v2writable: index_sync: reduce fill_alternates calls Eric Wong
2020-07-24  5:55 ` [PATCH 05/20] v2writable: move {autime} and {cotime} into $sync state Eric Wong
2020-07-24  5:55 ` [PATCH 06/20] v2writable: allow >= 40 byte git object IDs Eric Wong
2020-07-24  5:55 ` [PATCH 07/20] v2writable: drop "EPOCH.git indexing $RANGE" progress Eric Wong
2020-07-24  5:55 ` Eric Wong [this message]
2020-07-24  5:55 ` [PATCH 09/20] search: avoid copying {inboxdir} Eric Wong
2020-07-24  5:55 ` [PATCH 10/20] v2writable: use read-only PublicInbox::Git for cat_file Eric Wong
2020-07-24  5:55 ` [PATCH 11/20] v2writable: get rid of {reindex_pipe} field Eric Wong
2020-07-24  5:55 ` [PATCH 12/20] v2writable: clarify "epoch" comment Eric Wong
2020-07-24  5:55 ` [PATCH 13/20] xapcmd: set {from} properly for v1 inboxes Eric Wong
2020-07-24  5:56 ` [PATCH 14/20] searchidx: rename _xdb_{acquire,release} => idx_ Eric Wong
2020-07-24  5:56 ` [PATCH 15/20] searchidx: make v1 indexing closer to v2 Eric Wong
2020-07-24  5:56 ` [PATCH 16/20] index+xcpdb: support --no-sync flag Eric Wong
2020-07-24  5:56 ` [PATCH 17/20] v2writable: share log2stack code with v1 Eric Wong
2020-07-24  5:56 ` [PATCH 18/20] searchidx: support async git check Eric Wong
2020-07-24  5:56 ` [PATCH 19/20] searchidx: $batch_cb => v1_checkpoint Eric Wong
2020-07-24  5:56 ` [PATCH 20/20] v2writable: {unindexed} belongs in $sync state Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200724055606.27332-9-e@yhbt.net \
    --to=e@yhbt.net \
    --cc=meta@public-inbox.org \
    --subject='Re: [PATCH 08/20] use consistent {ibx} field for writable code paths' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Code repositories for project(s) associated with this inbox:

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).