From 75ffc6a266699e465471adf5992d36a1db8dc1ae Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 15 Dec 2020 02:02:22 +0000 Subject: extsearchidx: simplify reindex code paths Since we're inside a Xapian transaction, calling ->index_raw followed by ->shard_add_eidx_info calls on the same docid doesn't seem to hurt indexing performance. It definitely reduces FS read traffic and IPC from git at the cost of some more IPC between the parent and workers. Nevertheless, the code and FD reductions seem worth it. --- lib/PublicInbox/SearchIdxShard.pm | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) (limited to 'lib/PublicInbox/SearchIdxShard.pm') diff --git a/lib/PublicInbox/SearchIdxShard.pm b/lib/PublicInbox/SearchIdxShard.pm index b6eef6bd..ee00858b 100644 --- a/lib/PublicInbox/SearchIdxShard.pm +++ b/lib/PublicInbox/SearchIdxShard.pm @@ -94,8 +94,6 @@ sub shard_worker_loop ($$$$$) { my $over_fn = $1; $over_fn =~ tr/\0/\n/; $self->over_check(PublicInbox::Over->new($over_fn)); - } elsif ($line =~ /\AE ([0-9]+)\n/) { - $self->reindex_docid($1 + 0); } else { chomp $line; my $eidx_key; @@ -124,9 +122,9 @@ sub shard_worker_loop ($$$$$) { } sub index_raw { - my ($self, $msgref, $eml, $smsg, $ibx) = @_; + my ($self, $msgref, $eml, $smsg, $eidx_key) = @_; if (my $w = $self->{w}) { - my @ekey = $ibx ? ('X='.$ibx->eidx_key."\0") : (); + my @ekey = defined($eidx_key) ? ("X=$eidx_key\0") : (); $msgref //= \($eml->as_string); $smsg->{raw_bytes} //= length($$msgref); # mid must be last, it can contain spaces (but not LF) @@ -140,7 +138,7 @@ sub index_raw { $eml = PublicInbox::Eml->new($msgref); } $self->begin_txn_lazy; - $smsg->{eidx_key} = $ibx->eidx_key if $ibx; + $smsg->{eidx_key} = $eidx_key if defined $eidx_key; $self->add_message($eml, $smsg); } } @@ -225,13 +223,4 @@ sub shard_over_check { } } -sub shard_reindex_docid { - my ($self, $docid) = @_; - if (my $w = $self->{w}) { - print $w "E $docid\n" or die "failed to write to shard: $!"; - } else { - $self->reindex_docid($docid); - } -} - 1; -- cgit v1.2.3-24-ge0c7