about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2021-05-28 19:47:23 +0000
committerEric Wong <e@80x24.org>2021-05-28 22:37:53 +0000
commit5944277a4dc986f9adf9ba69a1e6e44899ad98c1 (patch)
tree45c8fe978d381e61eb3e7658ef89fa51f7a2103d
parent82e141cdf4a00de174b0a919689d846bb387cbf5 (diff)
downloadpublic-inbox-5944277a4dc986f9adf9ba69a1e6e44899ad98c1.tar.gz
Xapian DBs may be modified by a parallel process while we're
reading it, and Xapian's MVCC model places the burden on readers
to retry operations.

We'll also have retry_reopen croak instead of die on errors,
which ought to help us track down some "Document not found"
errors I've occasionally seen when using "lei <q|up>".
-rw-r--r--lib/PublicInbox/LeiSearch.pm33
-rw-r--r--lib/PublicInbox/LeiXSearch.pm21
-rw-r--r--lib/PublicInbox/Search.pm7
3 files changed, 42 insertions, 19 deletions
diff --git a/lib/PublicInbox/LeiSearch.pm b/lib/PublicInbox/LeiSearch.pm
index 9297d060..b09d1e45 100644
--- a/lib/PublicInbox/LeiSearch.pm
+++ b/lib/PublicInbox/LeiSearch.pm
@@ -18,7 +18,7 @@ sub num2docid ($$) {
         ($num - 1) * $nshard + $num % $nshard + 1;
 }
 
-sub msg_keywords {
+sub _msg_kw { # retry_reopen callback
         my ($self, $num) = @_; # num_or_mitem
         my $xdb = $self->xdb; # set {nshard};
         my $docid = ref($num) ? $num->get_docid : num2docid($self, $num);
@@ -27,13 +27,16 @@ sub msg_keywords {
         wantarray ? sort(keys(%$kw)) : $kw;
 }
 
-# returns undef if blob is unknown
-sub oid_keywords {
-        my ($self, $oidhex) = @_;
-        my @num = $self->over->blob_exists($oidhex) or return;
+sub msg_keywords {
+        my ($self, $num) = @_; # num_or_mitem
+        $self->retry_reopen(\&_msg_kw, $num);
+}
+
+sub _oid_kw { # retry_reopen callback
+        my ($self, $nums) = @_;
         my $xdb = $self->xdb; # set {nshard};
         my %kw;
-        for my $num (@num) { # there should only be one...
+        for my $num (@$nums) { # there should only be one...
                 my $doc = $xdb->get_document(num2docid($self, $num));
                 my $x = xap_terms('K', $doc);
                 %kw = (%kw, %$x);
@@ -41,10 +44,15 @@ sub oid_keywords {
         \%kw;
 }
 
-# lookup keywords+labels for external messages
-sub xsmsg_vmd {
+# returns undef if blob is unknown
+sub oid_keywords {
+        my ($self, $oidhex) = @_;
+        my @num = $self->over->blob_exists($oidhex) or return;
+        $self->retry_reopen(\&_oid_kw, \@num);
+}
+
+sub _xsmsg_vmd { # retry_reopen
         my ($self, $smsg, $want_label) = @_;
-        return if $smsg->{kw};
         my $xdb = $self->xdb; # set {nshard};
         my (%kw, %L, $doc, $x);
         $kw{flagged} = 1 if delete($smsg->{lei_q_tt_flagged});
@@ -62,6 +70,13 @@ sub xsmsg_vmd {
         $smsg->{L} = [ sort keys %L ] if scalar(keys(%L));
 }
 
+# lookup keywords+labels for external messages
+sub xsmsg_vmd {
+        my ($self, $smsg, $want_label) = @_;
+        return if $smsg->{kw}; # already set by LeiXSearch->mitem_kw
+        $self->retry_reopen(\&_xsmsg_vmd, $smsg, $want_label);
+}
+
 # when a message has no Message-IDs at all, this is needed for
 # unsent Draft messages, at least
 sub content_key ($) {
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index 760f9718..2e548a7a 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -67,17 +67,23 @@ sub remotes { @{$_[0]->{remotes} // []} }
 # called by PublicInbox::Search::xdb (usually via ->mset)
 sub xdb_shards_flat { @{$_[0]->{shards_flat} // []} }
 
-sub mitem_kw ($$;$) {
-        my ($smsg, $mitem, $flagged) = @_;
-        my $kw = xap_terms('K', my $doc = $mitem->get_document);
+sub _mitem_kw { # retry_reopen callback
+        my ($srch, $smsg, $mitem, $flagged) = @_;
+        my $doc = $mitem->get_document;
+        my $kw = xap_terms('K', $doc);
         $kw->{flagged} = 1 if $flagged;
+        my $L = xap_terms('L', $doc);
         # we keep the empty {kw} array here to prevent expensive work in
         # ->xsmsg_vmd, _unbless_smsg will clobber it iff it's empty
         $smsg->{kw} = [ sort keys %$kw ];
-        my $L = xap_terms('L', $doc);
         $smsg->{L} = [ sort keys %$L ] if scalar(keys %$L);
 }
 
+sub mitem_kw ($$$;$) {
+        my ($srch, $smsg, $mitem, $flagged) = @_;
+        $srch->retry_reopen(\&_mitem_kw, $smsg, $mitem, $flagged);
+}
+
 # like over->get_art
 sub smsg_for {
         my ($self, $mitem) = @_;
@@ -90,7 +96,7 @@ sub smsg_for {
         my $smsg = $ibx->over->get_art($num);
         return if $smsg->{bytes} == 0; # external message
         if ($ibx->can('msg_keywords')) {
-                mitem_kw($smsg, $mitem);
+                mitem_kw($self, $smsg, $mitem);
         }
         $smsg;
 }
@@ -194,7 +200,8 @@ sub query_one_mset { # for --threads and l2m w/o sort
                                         my $mitem = delete $n2item{$n};
                                         next if $smsg->{bytes} == 0;
                                         if ($mitem && $can_kw) {
-                                                mitem_kw($smsg, $mitem, $fl);
+                                                mitem_kw($srch, $smsg, $mitem,
+                                                        $fl);
                                         } elsif ($mitem && $fl) {
                                                 # call ->xsmsg_vmd, later
                                                 $smsg->{lei_q_tt_flagged} = 1;
@@ -210,7 +217,7 @@ sub query_one_mset { # for --threads and l2m w/o sort
                                 my $mitem = $items[$i++];
                                 my $smsg = $over->get_art($n) or next;
                                 next if $smsg->{bytes} == 0;
-                                mitem_kw($smsg, $mitem, $fl) if $can_kw;
+                                mitem_kw($srch, $smsg, $mitem, $fl) if $can_kw;
                                 $each_smsg->($smsg, $mitem);
                         }
                 }
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index fbcff2c3..59a5a3b0 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -9,6 +9,7 @@ use parent qw(Exporter);
 our @EXPORT_OK = qw(retry_reopen int_val get_pct xap_terms);
 use List::Util qw(max);
 use POSIX qw(strftime);
+use Carp ();
 
 # values for searching, changing the numeric value breaks
 # compatibility with old indices (so don't change them it)
@@ -405,16 +406,16 @@ sub retry_reopen {
                 # Exception: The revision being read has been discarded -
                 # you should call Xapian::Database::reopen()
                 if (ref($@) =~ /\bDatabaseModifiedError\b/) {
-                        warn "reopen try #$i on $@\n";
+                        warn "# reopen try #$i on $@\n";
                         reopen($self);
                 } else {
                         # let caller decide how to spew, because ExtMsg queries
                         # get wonky and trigger:
                         # "something terrible happened at .../Xapian/Enquire.pm"
-                        die;
+                        Carp::croak($@);
                 }
         }
-        die "Too many Xapian database modifications in progress\n";
+        Carp::croak("Too many Xapian database modifications in progress\n");
 }
 
 sub _do_enquire {