about summary refs log tree commit homepage
path: root/lib/PublicInbox/SearchIdxShard.pm
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2020-12-15 02:02:22 +0000
committerEric Wong <e@80x24.org>2020-12-17 19:13:14 +0000
commit75ffc6a266699e465471adf5992d36a1db8dc1ae (patch)
tree874275015e175ebf12c9fad5858228021bba4ce5 /lib/PublicInbox/SearchIdxShard.pm
parentc014cd93de1f2c73348db0e6531f93cf0f1be60f (diff)
downloadpublic-inbox-75ffc6a266699e465471adf5992d36a1db8dc1ae.tar.gz
Since we're inside a Xapian transaction, calling ->index_raw
followed by ->shard_add_eidx_info calls on the same docid
doesn't seem to hurt indexing performance.  It definitely
reduces FS read traffic and IPC from git at the cost of some
more IPC between the parent and workers.  Nevertheless, the code
and FD reductions seem worth it.
Diffstat (limited to 'lib/PublicInbox/SearchIdxShard.pm')
-rw-r--r--lib/PublicInbox/SearchIdxShard.pm17
1 files changed, 3 insertions, 14 deletions
diff --git a/lib/PublicInbox/SearchIdxShard.pm b/lib/PublicInbox/SearchIdxShard.pm
index b6eef6bd..ee00858b 100644
--- a/lib/PublicInbox/SearchIdxShard.pm
+++ b/lib/PublicInbox/SearchIdxShard.pm
@@ -94,8 +94,6 @@ sub shard_worker_loop ($$$$$) {
                         my $over_fn = $1;
                         $over_fn =~ tr/\0/\n/;
                         $self->over_check(PublicInbox::Over->new($over_fn));
-                } elsif ($line =~ /\AE ([0-9]+)\n/) {
-                        $self->reindex_docid($1 + 0);
                 } else {
                         chomp $line;
                         my $eidx_key;
@@ -124,9 +122,9 @@ sub shard_worker_loop ($$$$$) {
 }
 
 sub index_raw {
-        my ($self, $msgref, $eml, $smsg, $ibx) = @_;
+        my ($self, $msgref, $eml, $smsg, $eidx_key) = @_;
         if (my $w = $self->{w}) {
-                my @ekey = $ibx ? ('X='.$ibx->eidx_key."\0") : ();
+                my @ekey = defined($eidx_key) ? ("X=$eidx_key\0") : ();
                 $msgref //= \($eml->as_string);
                 $smsg->{raw_bytes} //= length($$msgref);
                 # mid must be last, it can contain spaces (but not LF)
@@ -140,7 +138,7 @@ sub index_raw {
                         $eml = PublicInbox::Eml->new($msgref);
                 }
                 $self->begin_txn_lazy;
-                $smsg->{eidx_key} = $ibx->eidx_key if $ibx;
+                $smsg->{eidx_key} = $eidx_key if defined $eidx_key;
                 $self->add_message($eml, $smsg);
         }
 }
@@ -225,13 +223,4 @@ sub shard_over_check {
         }
 }
 
-sub shard_reindex_docid {
-        my ($self, $docid) = @_;
-        if (my $w = $self->{w}) {
-                print $w "E $docid\n" or die "failed to write to shard: $!";
-        } else {
-                $self->reindex_docid($docid);
-        }
-}
-
 1;