From: Eric Wong <e@80x24.org> To: meta@public-inbox.org Cc: Eric Wong <e@80x24.org> Subject: [PATCH] search: retry document loading from Xapian Date: Sat, 10 Dec 2016 23:35:43 +0000 Message-ID: <20161210233543.15562-1-e@80x24.org> (raw) In addition to needing to retry enquire queries, we also need to protect document loading from the Xapian DB and retry on modification, as it seems to throw the same errors. Checking the $@ ref for Search::Xapian::DatabaseModifiedError is actually in the test suite for both the XS and SWIG Xapian bindings, so we should be good as far as forward/backwards compatibility. --- lib/PublicInbox/Search.pm | 15 ++++++++++----- lib/PublicInbox/SearchView.pm | 39 +++++++++++++++++++++++++++++---------- lib/PublicInbox/View.pm | 19 +++++++++++-------- 3 files changed, 50 insertions(+), 23 deletions(-) diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index 5e6bfc6..24cb266 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -166,22 +166,27 @@ sub get_thread { _do_enquire($self, $qtid, $opts); } -sub _do_enquire { - my ($self, $query, $opts) = @_; +sub retry_reopen { + my ($self, $cb) = @_; my $ret; for (1..10) { - eval { $ret = _enquire_once($self, $query, $opts) }; + eval { $ret = $cb->() }; return $ret unless $@; # Exception: The revision being read has been discarded - # you should call Xapian::Database::reopen() - if (index($@, 'Xapian::Database::reopen') >= 0) { + if (ref($@) eq 'Search::Xapian::DatabaseModifiedError') { reopen($self); } else { - die $@; + die; } } } +sub _do_enquire { + my ($self, $query, $opts) = @_; + retry_reopen($self, sub { _enquire_once($self, $query, $opts) }); +} + sub _enquire_once { my ($self, $query, $opts) = @_; my $enquire = $self->enquire; diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm index 6af151a..50a2c01 100644 --- a/lib/PublicInbox/SearchView.pm +++ b/lib/PublicInbox/SearchView.pm @@ -59,6 +59,17 @@ sub sres_top_html { PublicInbox::WwwStream->response($ctx, $code, $cb); } +# allow undef for individual doc loads... +sub load_doc_retry { + my ($srch, $mitem) = @_; + + eval { + $srch->retry_reopen(sub { + PublicInbox::SearchMsg->load_doc($mitem->get_document) + }); + } +} + # display non-threaded search results similar to what users expect from # regular WWW search engines: sub mset_summary { @@ -68,10 +79,18 @@ sub mset_summary { my $pad = length("$total"); my $pfx = ' ' x $pad; my $res = \($ctx->{-html_tip}); + my $srch = $ctx->{srch}; foreach my $m ($mset->items) { my $rank = sprintf("%${pad}d", $m->get_rank + 1); my $pct = $m->get_percent; - my $smsg = PublicInbox::SearchMsg->load_doc($m->get_document); + my $smsg = load_doc_retry($srch, $m); + unless ($smsg) { + eval { + $m = "$m ".$m->get_docid . " expired\n"; + $ctx->{env}->{'psgi.errors'}->print($m); + }; + next; + } my $s = ascii_html($smsg->subject); my $f = ascii_html($smsg->from_name); my $ts = PublicInbox::View::fmt_ts($smsg->ts); @@ -145,14 +164,14 @@ sub search_nav_bot { sub mset_thread { my ($ctx, $mset, $q) = @_; my %pct; - my @m = map { + my $msgs = $ctx->{srch}->retry_reopen(sub { [ map { my $i = $_; - my $m = PublicInbox::SearchMsg->load_doc($i->get_document); - $pct{$m->mid} = $i->get_percent; - $m; - } ($mset->items); + my $smsg = PublicInbox::SearchMsg->load_doc($i->get_document); + $pct{$smsg->mid} = $i->get_percent; + $smsg; + } ($mset->items) ]}); - my $th = PublicInbox::SearchThread->new(\@m); + my $th = PublicInbox::SearchThread->new($msgs); $th->thread; if ($q->{r}) { # order by relevance $th->order(sub { @@ -175,12 +194,11 @@ sub mset_thread { $ctx->{prev_attr} = ''; $ctx->{prev_level} = 0; $ctx->{seen} = {}; - $ctx->{s_nr} = scalar(@m).'+ results'; + $ctx->{s_nr} = scalar(@$msgs).'+ results'; PublicInbox::View::walk_thread($th, $ctx, *PublicInbox::View::pre_thread); - my $msgs = \@m; my $mime; sub { return unless $msgs; @@ -217,9 +235,10 @@ sub adump { my $ibx = $ctx->{-inbox}; my @items = $mset->items; $ctx->{search_query} = $q; + my $srch = $ctx->{srch}; PublicInbox::WwwAtomStream->response($ctx, 200, sub { while (my $x = shift @items) { - $x = PublicInbox::SearchMsg->load_doc($x->get_document); + $x = load_doc_retry($srch, $x); $x = $ibx->msg_by_smsg($x) and return Email::MIME->new($x); } diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm index ec5f7e0..fa47a16 100644 --- a/lib/PublicInbox/View.pm +++ b/lib/PublicInbox/View.pm @@ -327,8 +327,9 @@ sub stream_thread ($$) { sub thread_html { my ($ctx) = @_; my $mid = $ctx->{mid}; - my $sres = $ctx->{srch}->get_thread($mid); - my $msgs = load_results($sres); + my $srch = $ctx->{srch}; + my $sres = $srch->get_thread($mid); + my $msgs = load_results($srch, $sres); my $nr = $sres->{total}; return missing_thread($ctx) if $nr == 0; my $skel = '<hr><pre>'; @@ -574,7 +575,8 @@ sub thread_skel { $ctx->{prev_attr} = ''; $ctx->{prev_level} = 0; $ctx->{dst} = $dst; - walk_thread(thread_results(load_results($sres)), $ctx, *skel_dump); + $sres = load_results($srch, $sres); + walk_thread(thread_results($sres), $ctx, *skel_dump); $ctx->{parent_msg} = $parent; } @@ -733,9 +735,9 @@ sub indent_for { } sub load_results { - my ($sres) = @_; - - [ map { $_->ensure_metadata; $_ } @{delete $sres->{msgs}} ]; + my ($srch, $sres) = @_; + my $msgs = delete $sres->{msgs}; + $srch->retry_reopen(sub { [ map { $_->ensure_metadata; $_ } @$msgs ] }); } sub msg_timestamp { @@ -975,10 +977,11 @@ sub index_topics { my $opts = { offset => $off, limit => 200 }; $ctx->{order} = []; - my $sres = $ctx->{srch}->query('', $opts); + my $srch = $ctx->{srch}; + my $sres = $srch->query('', $opts); my $nr = scalar @{$sres->{msgs}}; if ($nr) { - $sres = load_results($sres); + $sres = load_results($srch, $sres); walk_thread(thread_results($sres), $ctx, *acc_topic); } $ctx->{-next_o} = $off+ $nr; -- EW
reply other threads:[~2016-12-10 23:35 UTC|newest] Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style List information: https://public-inbox.org/README * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20161210233543.15562-1-e@80x24.org \ --to=e@80x24.org \ --cc=meta@public-inbox.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
user/dev discussion of public-inbox itself This inbox may be cloned and mirrored by anyone: git clone --mirror https://public-inbox.org/meta git clone --mirror http://czquwvybam4bgbro.onion/meta git clone --mirror http://hjrcffqmbrq6wope.onion/meta git clone --mirror http://ou63pmih66umazou.onion/meta # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V1 meta meta/ https://public-inbox.org/meta \ meta@public-inbox.org public-inbox-index meta Example config snippet for mirrors. Newsgroups are available over NNTP: nntp://news.public-inbox.org/inbox.comp.mail.public-inbox.meta nntp://ou63pmih66umazou.onion/inbox.comp.mail.public-inbox.meta nntp://czquwvybam4bgbro.onion/inbox.comp.mail.public-inbox.meta nntp://hjrcffqmbrq6wope.onion/inbox.comp.mail.public-inbox.meta nntp://news.gmane.io/gmane.mail.public-inbox.general note: .onion URLs require Tor: https://www.torproject.org/ code repositories for the project(s) associated with this inbox: https://80x24.org/public-inbox.git AGPL code for this site: git clone https://public-inbox.org/public-inbox.git