From: Eric Wong <e@80x24.org> To: meta@public-inbox.org Subject: [PATCH 08/11] extmsg: prevent cross-inbox matches from hogging event loop Date: Wed, 9 Sep 2020 06:26:15 +0000 Message-ID: <20200909062618.5940-9-e@80x24.org> (raw) In-Reply-To: <20200909062618.5940-1-e@80x24.org> With many inboxes, checking multiple SQLite repos will be slow and time-consuming, so ensure we can schedule it fairly between multiple inboxes. --- lib/PublicInbox/ExtMsg.pm | 101 ++++++++++++++++++++++++++------------ 1 file changed, 70 insertions(+), 31 deletions(-) diff --git a/lib/PublicInbox/ExtMsg.pm b/lib/PublicInbox/ExtMsg.pm index 929737f1..ce1a47bb 100644 --- a/lib/PublicInbox/ExtMsg.pm +++ b/lib/PublicInbox/ExtMsg.pm @@ -74,69 +74,106 @@ sub search_partial ($$) { } sub ext_msg_i { - my ($other, $cur, $mid, $ibxs, $found) = @_; + my ($other, $ctx) = @_; - return if $other->{name} eq $cur->{name} || !$other->base_url; + return if $other->{name} eq $ctx->{-inbox}->{name} || !$other->base_url; my $mm = $other->mm or return; # try to find the URL with Msgmap to avoid forking - my $num = $mm->num_for($mid); + my $num = $mm->num_for($ctx->{mid}); if (defined $num) { - push @$found, $other; + push @{$ctx->{found}}, $other; } else { # no point in trying the fork fallback if we # know Xapian is up-to-date but missing the # message in the current repo - push @$ibxs, $other; + push @{$ctx->{again}}, $other; + } +} + +sub ext_msg_step { + my ($pi_cfg, $section, $ctx) = @_; + if (defined($section)) { + return if $section !~ m!\Apublicinbox\.([^/]+)\z!; + my $ibx = $pi_cfg->lookup_name($1) or return; + ext_msg_i($ibx, $ctx); + } else { # undef == "EOF" + finalize_exact($ctx); } } sub ext_msg { my ($ctx) = @_; - my $cur = $ctx->{-inbox}; - my $mid = $ctx->{mid}; + sub { + $ctx->{-wcb} = $_[0]; # HTTP server write callback + + if ($ctx->{env}->{'pi-httpd.async'}) { + require PublicInbox::ConfigIter; + my $iter = PublicInbox::ConfigIter->new( + $ctx->{www}->{pi_config}, + \&ext_msg_step, $ctx); + $iter->event_step; + } else { + $ctx->{www}->{pi_config}->each_inbox(\&ext_msg_i, $ctx); + finalize_exact($ctx); + } + }; +} - eval { require PublicInbox::Msgmap }; - my $ibxs = []; - my $found = []; +# called via PublicInbox::DS->EventLoop +sub event_step { + my ($ctx, $sync) = @_; + # can't find a partial match in current inbox, try the others: + my $ibx = shift @{$ctx->{again}} or goto \&finalize_partial; + my $mids = search_partial($ibx, $ctx->{mid}) or + return ($sync ? undef : PublicInbox::DS::requeue($ctx)); + $ctx->{n_partial} += scalar(@$mids); + push @{$ctx->{partial}}, [ $ibx, $mids ]; + $ctx->{n_partial} >= PARTIAL_MAX ? goto(\&finalize_partial) + : ($sync ? undef : PublicInbox::DS::requeue($ctx)); +} - $ctx->{www}->{pi_config}->each_inbox(\&ext_msg_i, - $cur, $mid, $ibxs, $found); +sub finalize_exact { + my ($ctx) = @_; - return exact($ctx, $found, $mid) if @$found; + return $ctx->{-wcb}->(exact($ctx)) if $ctx->{found}; # fall back to partial MID matching - my @partial; - my $n_partial = 0; + my $mid = $ctx->{mid}; + my $cur = $ctx->{-inbox}; my $mids = search_partial($cur, $mid); if ($mids) { - $n_partial = scalar(@$mids); - push @partial, [ $cur, $mids ]; - } - - # can't find a partial match in current inbox, try the others: - if (!$n_partial && length($mid) >= $MIN_PARTIAL_LEN) { - foreach my $ibx (@$ibxs) { - $mids = search_partial($ibx, $mid) or next; - $n_partial += scalar(@$mids); - push @partial, [ $ibx, $mids]; - last if $n_partial >= PARTIAL_MAX; + $ctx->{n_partial} = scalar(@$mids); + push @{$ctx->{partial}}, [ $cur, $mids ]; + } elsif ($ctx->{again} && length($mid) >= $MIN_PARTIAL_LEN) { + bless $ctx, __PACKAGE__; + if ($ctx->{env}->{'pi-httpd.async'}) { + $ctx->event_step; + return; } + + # synchronous fall-through + $ctx->event_step while @{$ctx->{again}}; } + goto \&finalize_partial; +} +sub finalize_partial { + my ($ctx) = @_; + my $mid = $ctx->{mid}; my $code = 404; my $href = mid_href($mid); my $html = ascii_html($mid); my $title = "<$html> not found"; my $s = "<pre>Message-ID <$html>\nnot found\n"; - if ($n_partial) { + if (my $n_partial = $ctx->{n_partial}) { $code = 300; my $es = $n_partial == 1 ? '' : 'es'; $n_partial .= '+' if ($n_partial == PARTIAL_MAX); $s .= "\n$n_partial partial match$es found:\n\n"; - my $cur_name = $cur->{name}; - foreach my $pair (@partial) { + my $cur_name = $ctx->{-inbox}->{name}; + foreach my $pair (@{$ctx->{partial}}) { my ($ibx, $res) = @$pair; my $env = $ctx->{env} if $ibx->{name} eq $cur_name; my $u = $ibx->base_url($env) or next; @@ -155,7 +192,7 @@ sub ext_msg { $ctx->{-html_tip} = $s .= '</pre>'; $ctx->{-title_html} = $title; $ctx->{-upfx} = '../'; - html_oneshot($ctx, $code); + $ctx->{-wcb}->(html_oneshot($ctx, $code)); } sub ext_urls { @@ -177,7 +214,9 @@ sub ext_urls { } sub exact { - my ($ctx, $found, $mid) = @_; + my ($ctx) = @_; + my $mid = $ctx->{mid}; + my $found = $ctx->{found}; my $href = mid_href($mid); my $html = ascii_html($mid); my $title = "<$html> found in ";
next prev parent reply other threads:[~2020-09-09 6:26 UTC|newest] Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top 2020-09-09 6:26 [PATCH 00/11] httpd: further reduce event loop monopolization Eric Wong 2020-09-09 6:26 ` [PATCH 01/11] xt/solver: test with public-inbox-httpd, too Eric Wong 2020-09-09 6:26 ` [PATCH 02/11] solver: drop warnings, modernize use v5.10.1, use SEEK_SET Eric Wong 2020-09-09 6:26 ` [PATCH 03/11] use "\&" where possible when referring to subroutines Eric Wong 2020-09-09 6:26 ` [PATCH 04/11] www: manifest.js.gz generation no longer hogs event loop Eric Wong 2020-09-09 6:26 ` [PATCH 05/11] config: flatten each_inbox and iterate_start args Eric Wong 2020-09-09 6:26 ` [PATCH 06/11] config: split out iterator into separate object Eric Wong 2020-09-09 6:26 ` [PATCH 07/11] t/cgi.t: show stderr on failures Eric Wong 2020-09-09 6:26 ` Eric Wong [this message] 2020-09-09 6:26 ` [PATCH 09/11] wwwlisting: avoid hogging event loop Eric Wong 2020-09-09 6:26 ` [PATCH 10/11] solver: check one git coderepo and inbox at a time Eric Wong 2020-09-09 6:26 ` [PATCH 11/11] solver: break apart inbox blob retrieval Eric Wong 2020-09-10 1:51 ` [PATCH 12/11] solver: async blob retrieval for diff extraction Eric Wong
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style List information: http://public-inbox.org/README * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20200909062618.5940-9-e@80x24.org \ --to=e@80x24.org \ --cc=meta@public-inbox.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
user/dev discussion of public-inbox itself This inbox may be cloned and mirrored by anyone: git clone --mirror http://public-inbox.org/meta git clone --mirror http://czquwvybam4bgbro.onion/meta git clone --mirror http://hjrcffqmbrq6wope.onion/meta git clone --mirror http://ou63pmih66umazou.onion/meta # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V1 meta meta/ http://public-inbox.org/meta \ meta@public-inbox.org public-inbox-index meta Example config snippet for mirrors. Newsgroups are available over NNTP: nntp://news.public-inbox.org/inbox.comp.mail.public-inbox.meta nntp://ou63pmih66umazou.onion/inbox.comp.mail.public-inbox.meta nntp://czquwvybam4bgbro.onion/inbox.comp.mail.public-inbox.meta nntp://hjrcffqmbrq6wope.onion/inbox.comp.mail.public-inbox.meta nntp://news.gmane.io/gmane.mail.public-inbox.general note: .onion URLs require Tor: https://www.torproject.org/ code repositories for the project(s) associated with this inbox: https://80x24.org/public-inbox.git AGPL code for this site: git clone https://public-inbox.org/public-inbox.git