user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download mbox.gz: |
* [PATCH 12/11] solver: async blob retrieval for diff extraction
  @ 2020-09-10  1:51  7% ` Eric Wong
  0 siblings, 0 replies; 1+ results
From: Eric Wong @ 2020-09-10  1:51 UTC (permalink / raw)
  To: meta

Like the rest of the WWW code, public-inbox-httpd now uses
git_async_cat to retrieve blobs without blocking the event loop.
This improves fairness when git blobs are on slow storage and
allows us to take better advantage of SMP systems.
---
 lib/PublicInbox/SolverGit.pm | 85 +++++++++++++++++++++++-------------
 1 file changed, 54 insertions(+), 31 deletions(-)

diff --git a/lib/PublicInbox/SolverGit.pm b/lib/PublicInbox/SolverGit.pm
index c54d6d54..ae3997ca 100644
--- a/lib/PublicInbox/SolverGit.pm
+++ b/lib/PublicInbox/SolverGit.pm
@@ -16,6 +16,8 @@ use PublicInbox::Git qw(git_unquote git_quote);
 use PublicInbox::MsgIter qw(msg_part_text);
 use PublicInbox::Qspawn;
 use PublicInbox::Tmpfile;
+use PublicInbox::GitAsyncCat;
+use PublicInbox::Eml;
 use URI::Escape qw(uri_escape_utf8);
 
 # POSIX requires _POSIX_ARG_MAX >= 4096, and xargs is required to
@@ -540,6 +542,47 @@ sub try_harder {
 	die "E: $@" if $@;
 }
 
+sub extract_diffs_done {
+	my ($self, $want) = @_;
+
+	delete $want->{try_smsgs};
+	delete $want->{cur_ibx};
+
+	my $diffs = delete $self->{tmp_diffs};
+	if (scalar @$diffs) {
+		unshift @{$self->{patches}}, @$diffs;
+		dbg($self, "found $want->{oid_b} in " .  join(" ||\n\t",
+			map { di_url($self, $_) } @$diffs));
+
+		# good, we can find a path to the oid we $want, now
+		# lets see if we need to apply more patches:
+		my $di = $diffs->[0];
+		my $src = $di->{oid_a};
+
+		unless ($src =~ /\A0+\z/) {
+			# we have to solve it using another oid, fine:
+			my $job = { oid_b => $src, path_b => $di->{path_a} };
+			push @{$self->{todo}}, $job;
+		}
+		goto \&next_step; # onto the next todo item
+	}
+	goto \&try_harder;
+}
+
+sub extract_diff_async {
+	my ($bref, $oid, $type, $size, $x) = @_;
+	my ($self, $want, $smsg) = @$x;
+	if (defined($oid)) {
+		$smsg->{blob} eq $oid or
+				ERR($self, "BUG: $smsg->{blob} != $oid");
+		PublicInbox::Eml->new($bref)->each_part(\&extract_diff, $x, 1);
+	}
+
+	scalar(@{$want->{try_smsgs}}) ?
+		retry_current($self, $want) :
+		extract_diffs_done($self, $want);
+}
+
 sub resolve_patch ($$) {
 	my ($self, $want) = @_;
 
@@ -550,39 +593,19 @@ sub resolve_patch ($$) {
 
 	if (my $msgs = $want->{try_smsgs}) {
 		my $smsg = shift @$msgs;
-		if (my $eml = $want->{cur_ibx}->smsg_eml($smsg)) {
-			$eml->each_part(\&extract_diff,
-					[ $self, $want, $smsg ], 1);
-		}
-
-		# try the remaining smsgs later
-		goto \&retry_current if scalar @$msgs;
-
-		delete $want->{try_smsgs};
-		delete $want->{cur_ibx};
-
-		my $diffs = delete $self->{tmp_diffs};
-		if (scalar @$diffs) {
-			unshift @{$self->{patches}}, @$diffs;
-			dbg($self, "found $cur_want in " .  join(" ||\n\t",
-				map { di_url($self, $_) } @$diffs));
-
-			# good, we can find a path to the oid we $want, now
-			# lets see if we need to apply more patches:
-			my $di = $diffs->[0];
-			my $src = $di->{oid_a};
-
-			unless ($src =~ /\A0+\z/) {
-				# we have to solve it using another oid, fine:
-				my $job = {
-					oid_b => $src,
-					path_b => $di->{path_a},
-				};
-				push @{$self->{todo}}, $job;
+		if ($self->{psgi_env}->{'pi-httpd.async'}) {
+			return git_async_cat($want->{cur_ibx}->git,
+						$smsg->{blob},
+						\&extract_diff_async,
+						[$self, $want, $smsg]);
+		} else {
+			if (my $eml = $want->{cur_ibx}->smsg_eml($smsg)) {
+				$eml->each_part(\&extract_diff,
+						[ $self, $want, $smsg ], 1);
 			}
-			goto \&next_step; # onto the next todo item
 		}
-		goto \&try_harder;
+
+		goto(scalar @$msgs ? \&retry_current : \&extract_diffs_done);
 	}
 
 	# see if we can find the blob in an existing git repo:

^ permalink raw reply related	[relevance 7%]

Results 1-1 of 1 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2020-09-09  6:26     [PATCH 00/11] httpd: further reduce event loop monopolization Eric Wong
2020-09-10  1:51  7% ` [PATCH 12/11] solver: async blob retrieval for diff extraction Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).