From dfbe27bb5533ef391b214692f25e2823b20064ac Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 24 Jul 2020 05:55:54 +0000 Subject: use consistent {ibx} field for writable code paths This is a step which makes our use of abbreviations more consistent when referring to PublicInbox::Inbox objects. We'll also be reducing the number of redundant fields in SearchIdx and V2Writable code paths to make the object graph easier-to-follow. --- lib/PublicInbox/Import.pm | 6 ++--- lib/PublicInbox/SearchIdx.pm | 31 +++++++++++++------------- lib/PublicInbox/SearchIdxShard.pm | 6 ++--- lib/PublicInbox/V2Writable.pm | 47 +++++++++++++++++++-------------------- 4 files changed, 44 insertions(+), 46 deletions(-) diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm index d565b0a0..b50c662c 100644 --- a/lib/PublicInbox/Import.pm +++ b/lib/PublicInbox/Import.pm @@ -35,7 +35,7 @@ sub new { ident => "$name <$email>", mark => 1, ref => $ref, - -inbox => $ibx, + ibx => $ibx, path_type => '2/38', # or 'v2' lock_path => "$git->{git_dir}/ssoma.lock", # v2 changes this bytes_added => 0, @@ -176,7 +176,7 @@ sub _update_git_info ($$) { run_die([@cmd, qw(read-tree -m -v -i), $self->{ref}], $env); } run_die([@cmd, 'update-server-info']); - my $ibx = $self->{-inbox}; + my $ibx = $self->{ibx}; ($ibx && $self->{path_type} eq '2/38') and eval { require PublicInbox::SearchIdx; my $s = PublicInbox::SearchIdx->new($ibx); @@ -385,7 +385,7 @@ sub add { # spam check: if ($check_cb) { - $mime = $check_cb->($mime, $self->{-inbox}) or return; + $mime = $check_cb->($mime, $self->{ibx}) or return; } my $blob = $self->{mark}++; diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index e641ffd4..4b1b1736 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -50,8 +50,7 @@ sub new { $ibx = PublicInbox::InboxWritable->new($ibx); my $self = bless { inboxdir => $inboxdir, - -inbox => $ibx, - git => $ibx->git, + ibx => $ibx, -altid => $altid, ibx_ver => $version, indexlevel => $indexlevel, @@ -548,14 +547,14 @@ sub unindex_both { # git->cat_async callback sub index_sync { my ($self, $opts) = @_; delete $self->{lock_path} if $opts->{-skip_lock}; - $self->{-inbox}->with_umask(\&_index_sync, $self, $opts); + $self->{ibx}->with_umask(\&_index_sync, $self, $opts); } -sub too_big ($$$) { - my ($self, $git, $oid) = @_; +sub too_big ($$) { + my ($self, $oid) = @_; my $max_size = $self->{index_max_size} or return; - my (undef, undef, $size) = $git->check($oid); - die "E: bad $oid in $git->{git_dir}\n" if !defined($size); + my (undef, undef, $size) = $self->{ibx}->git->check($oid); + die "E: bad $oid in $self->{ibx}->{inboxdir}\n" if !defined($size); return if $size <= $max_size; warn "W: skipping $oid ($size > $max_size)\n"; 1; @@ -568,7 +567,7 @@ sub read_log { my $h40 = $hex .'{40}'; my $addmsg = qr!^:000000 100644 \S+ ($h40) A\t${hex}{2}/${hex}{38}$!; my $delmsg = qr!^:100644 000000 ($h40) \S+ D\t${hex}{2}/${hex}{38}$!; - my $git = $self->{git}; + my $git = $self->{ibx}->git; my $latest; my $max = $BATCH_BYTES; local $/ = "\n"; @@ -591,7 +590,7 @@ sub read_log { } next; } - next if too_big($self, $git, $blob); + next if too_big($self, $blob); $git->cat_async($blob, \&index_both, { %$sync }); if ($max <= 0) { $git->cat_async_wait; @@ -600,7 +599,7 @@ sub read_log { } } elsif ($line =~ /$delmsg/o) { my $blob = $1; - $D{$blob} = 1 unless too_big($self, $git, $blob); + $D{$blob} = 1 unless too_big($self, $blob); } elsif ($line =~ /^commit ($h40)/o) { $latest = $1; $newest ||= $latest; @@ -621,7 +620,7 @@ sub read_log { sub _git_log { my ($self, $opts, $range) = @_; - my $git = $self->{git}; + my $git = $self->{ibx}->git; if (index($range, '..') < 0) { # don't show annoying git errors to users who run -index @@ -681,7 +680,7 @@ sub is_ancestor ($$$) { sub need_update ($$$) { my ($self, $cur, $new) = @_; - my $git = $self->{git}; + my $git = $self->{ibx}->git; return 1 if $cur && !is_ancestor($git, $cur, $new); my $range = $cur eq '' ? $new : "$cur..$new"; chomp(my $n = $git->qx(qw(rev-list --count), $range)); @@ -701,7 +700,7 @@ sub _last_x_commit { $lx = $lm; } # Use last_commit from msgmap if it is older or unset - if (!$lm || ($lx && $lm && is_ancestor($self->{git}, $lm, $lx))) { + if (!$lm || ($lx && $lm && is_ancestor($self->{ibx}->git, $lm, $lx))) { $lx = $lm; } $lx; @@ -718,7 +717,7 @@ sub _index_sync { my ($self, $opts) = @_; my $tip = $opts->{ref} || 'HEAD'; my ($last_commit, $lx, $xlog); - my $git = $self->{git}; + my $git = $self->{ibx}->git; $git->batch_prepare; my $pr = $opts->{-progress}; @@ -830,7 +829,7 @@ sub _begin_txn { sub begin_txn_lazy { my ($self) = @_; - $self->{-inbox}->with_umask(\&_begin_txn, $self) if !$self->{txn}; + $self->{ibx}->with_umask(\&_begin_txn, $self) if !$self->{txn}; } # store 'indexlevel=medium' in v2 shard=0 and v1 (only one shard) @@ -860,7 +859,7 @@ sub _commit_txn { sub commit_txn_lazy { my ($self) = @_; delete($self->{txn}) and - $self->{-inbox}->with_umask(\&_commit_txn, $self); + $self->{ibx}->with_umask(\&_commit_txn, $self); } sub worker_done { diff --git a/lib/PublicInbox/SearchIdxShard.pm b/lib/PublicInbox/SearchIdxShard.pm index 54426881..fd34e487 100644 --- a/lib/PublicInbox/SearchIdxShard.pm +++ b/lib/PublicInbox/SearchIdxShard.pm @@ -11,14 +11,14 @@ use IO::Handle (); # autoflush use PublicInbox::Eml; sub new { - my ($class, $v2writable, $shard) = @_; - my $ibx = $v2writable->{-inbox}; + my ($class, $v2w, $shard) = @_; + my $ibx = $v2w->{ibx}; my $self = $class->SUPER::new($ibx, 1, $shard); # create the DB before forking: $self->_xdb_acquire; $self->set_indexlevel; $self->_xdb_release; - $self->spawn_worker($v2writable, $shard) if $v2writable->{parallel}; + $self->spawn_worker($v2w, $shard) if $v2w->{parallel}; $self; } diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index 2ff2fc25..a1986a46 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -109,7 +109,7 @@ sub new { my $xpfx = "$dir/xap" . PublicInbox::Search::SCHEMA_VERSION; my $self = { - -inbox => $v2ibx, + ibx => $v2ibx, im => undef, # PublicInbox::Import parallel => 1, transact_bytes => 0, @@ -149,7 +149,7 @@ sub init_inbox { # mimics Import::add and wraps it for v2 sub add { my ($self, $eml, $check_cb) = @_; - $self->{-inbox}->with_umask(\&_add, $self, $eml, $check_cb); + $self->{ibx}->with_umask(\&_add, $self, $eml, $check_cb); } # indexes a message, returns true if checkpointing is needed @@ -169,7 +169,7 @@ sub _add { # spam check: if ($check_cb) { - $mime = $check_cb->($mime, $self->{-inbox}) or return; + $mime = $check_cb->($mime, $self->{ibx}) or return; } # All pipes (> $^F) known to Perl 5.6+ have FD_CLOEXEC set, @@ -218,7 +218,7 @@ sub v2_num_for { # AltId may pre-populate article numbers (e.g. X-Mail-Count # or NNTP article number), use that article number if it's # not in Over. - my $altid = $self->{-inbox}->{altid}; + my $altid = $self->{ibx}->{altid}; if ($altid && grep(/:file=msgmap\.sqlite3\z/, @$altid)) { my $num = $self->{mm}->num_for($mid); @@ -293,7 +293,7 @@ sub _idx_init { # with_umask callback # Now that all subprocesses are up, we can open the FDs # for SQLite: my $mm = $self->{mm} = PublicInbox::Msgmap->new_file( - "$self->{-inbox}->{inboxdir}/msgmap.sqlite3", 1); + "$self->{ibx}->{inboxdir}/msgmap.sqlite3", 1); $mm->{dbh}->begin_work; } @@ -301,7 +301,7 @@ sub _idx_init { # with_umask callback sub idx_init { my ($self, $opt) = @_; return if $self->{idx_shards}; - my $ibx = $self->{-inbox}; + my $ibx = $self->{ibx}; # do not leak read-only FDs to child processes, we only have these # FDs for duplicate detection so they should not be @@ -329,7 +329,7 @@ sub idx_init { sub _replace_oids ($$$) { my ($self, $mime, $replace_map) = @_; $self->done; - my $pfx = "$self->{-inbox}->{inboxdir}/git"; + my $pfx = "$self->{ibx}->{inboxdir}/git"; my $rewrites = []; # epoch => commit my $max = $self->{epoch_max}; @@ -450,7 +450,7 @@ sub rewrite_internal ($$;$$$) { # (retval[2]) is not part of the stable API shared with Import->remove sub remove { my ($self, $eml, $cmt_msg) = @_; - my $r = $self->{-inbox}->with_umask(\&rewrite_internal, + my $r = $self->{ibx}->with_umask(\&rewrite_internal, $self, $eml, $cmt_msg); defined($r) && defined($r->[0]) ? @$r: undef; } @@ -458,7 +458,7 @@ sub remove { sub _replace ($$;$$) { my ($self, $old_eml, $new_eml, $sref) = @_; my $arg = [ $self, $old_eml, undef, $new_eml, $sref ]; - my $rewritten = $self->{-inbox}->with_umask(\&rewrite_internal, + my $rewritten = $self->{ibx}->with_umask(\&rewrite_internal, $self, $old_eml, undef, $new_eml, $sref) or return; my $rewrites = $rewritten->{rewrites}; @@ -484,7 +484,7 @@ sub git_hash_raw ($$) { my ($self, $raw) = @_; # grab the expected OID we have to reindex: pipe(my($in, $w)) or die "pipe: $!"; - my $git_dir = $self->{-inbox}->git->{git_dir}; + my $git_dir = $self->{ibx}->git->{git_dir}; my $cmd = ['git', "--git-dir=$git_dir", qw(hash-object --stdin)]; my $r = popen_rd($cmd, undef, { 0 => $in }); print $w $$raw or die "print \$w: $!"; @@ -550,11 +550,11 @@ W: $list } # make sure we really got the OID: - my ($blob, $type, $bytes) = $self->{-inbox}->git->check($expect_oid); + my ($blob, $type, $bytes) = $self->{ibx}->git->check($expect_oid); $blob eq $expect_oid or die "BUG: $expect_oid not found after replace"; # don't leak FDs to Xapian: - $self->{-inbox}->git->cleanup; + $self->{ibx}->git->cleanup; # reindex modified messages: for my $smsg (@$need_reindex) { @@ -674,14 +674,14 @@ sub done { my $nbytes = $self->{total_bytes}; $self->{total_bytes} = 0; $self->lock_release(!!$nbytes) if $shards; - $self->{-inbox}->git->cleanup; + $self->{ibx}->git->cleanup; } sub fill_alternates ($$) { my ($self, $epoch) = @_; - my $pfx = "$self->{-inbox}->{inboxdir}/git"; - my $all = "$self->{-inbox}->{inboxdir}/all.git"; + my $pfx = "$self->{ibx}->{inboxdir}/git"; + my $all = "$self->{ibx}->{inboxdir}/all.git"; PublicInbox::Import::init_bare($all) unless -d $all; my $info_dir = "$all/objects/info"; my $alt = "$info_dir/alternates"; @@ -726,7 +726,7 @@ sub fill_alternates ($$) { sub git_init { my ($self, $epoch) = @_; - my $git_dir = "$self->{-inbox}->{inboxdir}/git/$epoch.git"; + my $git_dir = "$self->{ibx}->{inboxdir}/git/$epoch.git"; PublicInbox::Import::init_bare($git_dir); my @cmd = (qw/git config/, "--file=$git_dir/config", 'include.path', '../../all.git/config'); @@ -738,7 +738,7 @@ sub git_init { sub git_dir_latest { my ($self, $max) = @_; $$max = -1; - my $pfx = "$self->{-inbox}->{inboxdir}/git"; + my $pfx = "$self->{ibx}->{inboxdir}/git"; return unless -d $pfx; my $latest; opendir my $dh, $pfx or die "opendir $pfx: $!\n"; @@ -790,7 +790,7 @@ sub importer { sub import_init { my ($self, $git, $packed_bytes, $tmp) = @_; - my $im = PublicInbox::Import->new($git, undef, undef, $self->{-inbox}); + my $im = PublicInbox::Import->new($git, undef, undef, $self->{ibx}); $im->{bytes_added} = int($packed_bytes / $PACKING_FACTOR); $im->{lock_path} = undef; $im->{path_type} = 'v2'; @@ -823,8 +823,7 @@ sub get_blob ($$) { return $msg if $msg; } # older message, should be in alternates - my $ibx = $self->{-inbox}; - $ibx->msg_by_smsg($smsg); + $self->{ibx}->msg_by_smsg($smsg); } sub content_exists ($$$) { @@ -881,7 +880,7 @@ sub reindex_checkpoint ($$$) { sub reindex_oid ($$$$) { my ($self, $sync, $git, $oid) = @_; - return if PublicInbox::SearchIdx::too_big($self, $git, $oid); + return if PublicInbox::SearchIdx::too_big($self, $oid); my ($num, $mid0, $len); my $msgref = $git->cat_file($oid, \$len); return if $len == 0; # purged @@ -968,7 +967,7 @@ sub update_last_commit ($$$$) { last_epoch_commit($self, $i, $cmt); } -sub git_dir_n ($$) { "$_[0]->{-inbox}->{inboxdir}/git/$_[1].git" } +sub git_dir_n ($$) { "$_[0]->{ibx}->{inboxdir}/git/$_[1].git" } sub last_commits ($$) { my ($self, $epoch_max) = @_; @@ -1077,7 +1076,7 @@ sub sync_prepare ($$$) { my ($self, $sync, $epoch_max) = @_; my $pr = $sync->{-opt}->{-progress}; my $regen_max = 0; - my $head = $self->{-inbox}->{ref_head} || 'refs/heads/master'; + my $head = $self->{ibx}->{ref_head} || 'refs/heads/master'; # reindex stops at the current heads and we later rerun index_sync # without {reindex} @@ -1108,7 +1107,7 @@ sub sync_prepare ($$$) { # our code and blindly injects "d" file history into git repos if (my @leftovers = keys %{delete($sync->{D}) // {}}) { warn('W: unindexing '.scalar(@leftovers)." leftovers\n"); - my $git = $self->{-inbox}->git; + my $git = $self->{ibx}->git; for my $oid (@leftovers) { $oid = unpack('H*', $oid); $self->{current_info} = "leftover $oid"; -- cgit v1.2.3-24-ge0c7