diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/PublicInbox/Admin.pm | 8 | ||||
-rw-r--r-- | lib/PublicInbox/Fetch.pm | 145 | ||||
-rw-r--r-- | lib/PublicInbox/LEI.pm | 6 | ||||
-rw-r--r-- | lib/PublicInbox/LeiMirror.pm | 95 |
4 files changed, 214 insertions, 40 deletions
diff --git a/lib/PublicInbox/Admin.pm b/lib/PublicInbox/Admin.pm index 2534958b..9ff59bca 100644 --- a/lib/PublicInbox/Admin.pm +++ b/lib/PublicInbox/Admin.pm @@ -372,4 +372,12 @@ sub index_prepare ($$) { $env; } +sub do_chdir ($) { + my $chdir = $_[0] // return; + for my $d (@$chdir) { + next if $d eq ''; # same as git(1) + chdir $d or die "cd $d: $!"; + } +} + 1; diff --git a/lib/PublicInbox/Fetch.pm b/lib/PublicInbox/Fetch.pm new file mode 100644 index 00000000..d795731c --- /dev/null +++ b/lib/PublicInbox/Fetch.pm @@ -0,0 +1,145 @@ +# Copyright (C) all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +# Wrapper to "git fetch" remote public-inboxes +package PublicInbox::Fetch; +use strict; +use v5.10.1; +use parent qw(PublicInbox::IPC); +use URI (); +use PublicInbox::Spawn qw(popen_rd); +use PublicInbox::Admin; +use PublicInbox::LEI; +use PublicInbox::LeiCurl; +use PublicInbox::LeiMirror; +use IO::Uncompress::Gunzip qw(gunzip $GunzipError); +use File::Temp (); + +sub new { bless {}, __PACKAGE__ } + +sub fetch_cmd ($$) { + my ($lei, $opt) = @_; + my @cmd = qw(git); + $opt->{$_} = $lei->{$_} for (0..2); + # we support "-c $key=$val" for arbitrary git config options + # e.g.: git -c http.proxy=socks5h://127.0.0.1:9050 + push(@cmd, '-c', $_) for @{$lei->{opt}->{c} // []}; + push @cmd, 'fetch'; + push @cmd, '-q' if $lei->{opt}->{quiet}; + push @cmd, '-v' if $lei->{opt}->{verbose}; + @cmd; +} + +sub remote_url ($$) { + my ($lei, $dir) = @_; # TODO: support non-"origin"? + my $cmd = [ qw(git config remote.origin.url) ]; + my $fh = popen_rd($cmd, undef, { -C => $dir, 2 => $lei->{2} }); + my $url = <$fh>; + close $fh or return; + chomp $url; + $url; +} + +sub do_fetch { + my ($cls, $lei, $cd) = @_; + my $ibx_ver; + my $curl = PublicInbox::LeiCurl->new($lei) or return; + my $dir = PublicInbox::Admin::resolve_inboxdir($cd, \$ibx_ver); + if ($ibx_ver == 1) { + my $url = remote_url($lei, $dir) // + die "E: $dir missing remote.origin.url\n"; + my $uri = URI->new($url); + my $torsocks = $curl->torsocks($lei, $uri); + my $opt = { -C => $dir }; + my $cmd = [ @$torsocks, fetch_cmd($lei, $opt) ]; + my $cerr = PublicInbox::LeiMirror::run_reap($lei, $cmd, $opt); + $lei->child_error($cerr, "@$cmd failed") if $cerr; + return; + } + # v2: + opendir my $dh, "$dir/git" or die "opendir $dir/git: $!"; + my @epochs = sort { $b <=> $a } map { substr($_, 0, -4) + 0 } + grep(/\A[0-9]+\.git\z/, readdir($dh)); + my ($git_url, $epoch); + for my $nr (@epochs) { # try newest epoch, first + my $edir = "$dir/git/$nr.git"; + if (defined(my $url = remote_url($lei, $edir))) { + $git_url = $url; + $epoch = $nr; + last; + } else { + warn "W: $edir missing remote.origin.url\n"; + } + } + $git_url or die "Unable to determine git URL\n"; + my $inbox_url = $git_url; + $inbox_url =~ s!/git/$epoch(?:\.git)?/?\z!! or + $inbox_url =~ s!/$epoch(?:\.git)?/?\z!! or die <<EOM; +Unable to infer inbox URL from <$git_url> +EOM + $lei->qerr("# inbox URL: $inbox_url/"); + my $muri = URI->new("$inbox_url/manifest.js.gz"); + my $ft = File::Temp->new(TEMPLATE => 'manifest-XXXX', + UNLINK => 1, DIR => $dir); + my $fn = $ft->filename; + my @opt = (qw(-R -o), $fn); + my $mf = "$dir/manifest.js.gz"; + my $m0; # current manifest.js.gz contents + if (open my $fh, '<', $mf) { + $m0 = eval { + PublicInbox::LeiMirror::decode_manifest($fh, $mf, $mf) + }; + $lei->err($@) if $@; + push @opt, '-z', $mf if defined($m0); + } + my $curl_cmd = $curl->for_uri($lei, $muri, @opt); + my $opt = {}; + $opt->{$_} = $lei->{$_} for (0..2); + my $cerr = PublicInbox::LeiMirror::run_reap($lei, $curl_cmd, $opt); + return $lei->child_error($cerr, "@$curl_cmd failed") if $cerr; + return if !-s $ft; # 304 Not Modified via curl -z + + my $m1 = PublicInbox::LeiMirror::decode_manifest($ft, $fn, $muri); + my $mdiff = { %$m1 }; + + # filter out unchanged entries + while (my ($k, $v0) = each %{$m0 // {}}) { + my $cur = $m1->{$k} // next; + my $f0 = $v0->{fingerprint} // next; + my $f1 = $cur->{fingerprint} // next; + my $t0 = $v0->{modified} // next; + my $t1 = $cur->{modified} // next; + delete($mdiff->{$k}) if $f0 eq $f1 && $t0 == $t1; + } + my $ibx_uri = URI->new("$inbox_url/"); + my ($path_pfx, $v1_bare, @v2_epochs) = + PublicInbox::LeiMirror::deduce_epochs($mdiff, $ibx_uri->path); + defined($v1_bare) and die <<EOM; +E: got v1 `$v1_bare' when expecting v2 epoch(s) in <$muri>, WTF? +EOM + my @epoch_nr = sort { $a <=> $b } + map { my ($nr) = (m!/([0-9]+)\.git\z!g) } @v2_epochs; + + # n.b. this expects all epochs are from the same host + my $torsocks = $curl->torsocks($lei, $muri); + for my $nr (@epoch_nr) { + my $dir = "$dir/git/$nr.git"; + my $cmd; + my $opt = {}; + if (-d $dir) { + $opt->{-C} = $dir; + $cmd = [ @$torsocks, fetch_cmd($lei, $opt) ]; + } else { + my $e_uri = $ibx_uri->clone; + $e_uri->path($ibx_uri->path."git/$nr.git"); + $cmd = [ @$torsocks, + PublicInbox::LeiMirror::clone_cmd($lei, $opt), + $$e_uri, $dir ]; + } + my $cerr = PublicInbox::LeiMirror::run_reap($lei, $cmd, $opt); + return $lei->child_error($cerr, "@$cmd failed") if $cerr; + } + rename($fn, $mf) or die "E: rename($fn, $mf): $!\n"; + $ft->unlink_on_destroy(0); +} + +1; diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm index aff2bf19..6d5d3c03 100644 --- a/lib/PublicInbox/LEI.pm +++ b/lib/PublicInbox/LEI.pm @@ -468,6 +468,8 @@ sub x_it ($$) { $self->{pkt_op_p}->pkt_do('x_it', $code); } elsif ($self->{sock}) { # to lei(1) client send($self->{sock}, "x_it $code", MSG_EOR); + } elsif ($quit == \&CORE::exit) { # an admin command + exit($code >> 8); } # else ignore if client disconnected } @@ -511,7 +513,7 @@ sub fail ($$;$) { my ($self, $buf, $exit_code) = @_; $self->{failed}++; err($self, $buf) if defined $buf; - # calls fail_handler: + # calls fail_handler $self->{pkt_op_p}->pkt_do('!') if $self->{pkt_op_p}; x_it($self, ($exit_code // 1) << 8); undef; @@ -536,6 +538,8 @@ sub child_error { # passes non-fatal curl exit codes to user $self->{pkt_op_p}->pkt_do('child_error', $child_error); } elsif ($self->{sock}) { # to lei(1) client send($self->{sock}, "child_error $child_error", MSG_EOR); + } else { # non-lei admin command + $self->{child_error} ||= $child_error; } # else noop if client disconnected } diff --git a/lib/PublicInbox/LeiMirror.pm b/lib/PublicInbox/LeiMirror.pm index 355813bd..c128d13d 100644 --- a/lib/PublicInbox/LeiMirror.pm +++ b/lib/PublicInbox/LeiMirror.pm @@ -8,6 +8,7 @@ use v5.10.1; use parent qw(PublicInbox::IPC); use IO::Uncompress::Gunzip qw(gunzip $GunzipError); use PublicInbox::Spawn qw(popen_rd spawn); +use File::Temp (); sub do_finish_mirror { # dwaitpid callback my ($arg, $pid) = @_; @@ -18,7 +19,9 @@ sub do_finish_mirror { # dwaitpid callback } elsif (!unlink($f)) { $lei->err("unlink($f): $!") unless $!{ENOENT}; } else { - $lei->add_external_finish($mrr->{dst}); + if ($lei->{cmd} ne 'public-inbox-clone') { + $lei->add_external_finish($mrr->{dst}); + } $lei->qerr("# mirrored $mrr->{src} => $mrr->{dst}"); } $lei->dclose; @@ -121,33 +124,38 @@ sub _try_config { sub index_cloned_inbox { my ($self, $iv) = @_; - my $ibx = delete($self->{ibx}) // { - address => [ 'lei@example.com' ], - version => $iv, - }; - $ibx->{inboxdir} = $self->{dst}; - PublicInbox::Inbox->new($ibx); - PublicInbox::InboxWritable->new($ibx); - my $opt = {}; my $lei = $self->{lei}; - for my $sw ($lei->index_opt) { - my ($k) = ($sw =~ /\A([\w-]+)/); - $opt->{$k} = $lei->{opt}->{$k}; + + # n.b. public-inbox-clone works w/o (SQLite || Xapian) + # lei is useless without Xapian + SQLite + if ($lei->{cmd} ne 'public-inbox-clone') { + my $ibx = delete($self->{ibx}) // { + address => [ 'lei@example.com' ], + version => $iv, + }; + $ibx->{inboxdir} = $self->{dst}; + PublicInbox::Inbox->new($ibx); + PublicInbox::InboxWritable->new($ibx); + my $opt = {}; + for my $sw ($lei->index_opt) { + my ($k) = ($sw =~ /\A([\w-]+)/); + $opt->{$k} = $lei->{opt}->{$k}; + } + # force synchronous dwaitpid for v2: + local $PublicInbox::DS::in_loop = 0; + my $cfg = PublicInbox::Config->new(undef, $lei->{2}); + my $env = PublicInbox::Admin::index_prepare($opt, $cfg); + local %ENV = (%ENV, %$env) if $env; + PublicInbox::Admin::progress_prepare($opt, $lei->{2}); + PublicInbox::Admin::index_inbox($ibx, undef, $opt); } - # force synchronous dwaitpid for v2: - local $PublicInbox::DS::in_loop = 0; - my $cfg = PublicInbox::Config->new(undef, $lei->{2}); - my $env = PublicInbox::Admin::index_prepare($opt, $cfg); - local %ENV = (%ENV, %$env) if $env; - PublicInbox::Admin::progress_prepare($opt, $lei->{2}); - PublicInbox::Admin::index_inbox($ibx, undef, $opt); open my $x, '>', "$self->{dst}/mirror.done"; # for do_finish_mirror } sub run_reap { my ($lei, $cmd, $opt) = @_; $lei->qerr("# @$cmd"); - $opt->{pgid} = 0; + $opt->{pgid} = 0 if $lei->{sock}; my $pid = spawn($cmd, undef, $opt); my $reap = PublicInbox::OnDestroy->new($lei->can('sigint_reap'), $pid); waitpid($pid, 0) == $pid or die "waitpid @$cmd: $!"; @@ -205,6 +213,7 @@ sub deduce_epochs ($$) { my ($m, $path) = @_; my ($v1_bare, @v2_epochs); my $path_pfx = ''; + $path =~ s!/+\z!!; do { $v1_bare = $m->{$path}; @v2_epochs = grep(m!\A\Q$path\E/git/[0-9]+\.git\z!, keys %$m); @@ -213,6 +222,18 @@ sub deduce_epochs ($$) { ($path_pfx, $v1_bare, @v2_epochs); } +sub decode_manifest ($$$) { + my ($fh, $fn, $uri) = @_; + my $js; + my $gz = do { local $/; <$fh> } // die "slurp($fn): $!"; + gunzip(\$gz => \$js, MultiStream => 1) or + die "gunzip($uri): $GunzipError\n"; + my $m = eval { PublicInbox::Config->json->decode($js) }; + die "$uri: error decoding `$js': $@\n" if $@; + ref($m) eq 'HASH' or die "$uri unknown type: ".ref($m); + $m; +} + sub try_manifest { my ($self) = @_; my $uri = URI->new($self->{src}); @@ -221,26 +242,19 @@ sub try_manifest { my $path = $uri->path; chop($path) eq '/' or die "BUG: $uri not canonicalized"; $uri->path($path . '/manifest.js.gz'); - my $cmd = $curl->for_uri($lei, $uri); - $lei->qerr("# @$cmd"); - my $opt = { 0 => $lei->{0}, 2 => $lei->{2} }; - my ($fh, $pid) = popen_rd($cmd, undef, $opt); - my $reap = PublicInbox::OnDestroy->new($lei->can('sigint_reap'), $pid); - my $gz = do { local $/; <$fh> } // die "read(curl $uri): $!"; - close $fh; - waitpid($pid, 0) == $pid or die "waitpid @$cmd: $!"; - @$reap = (); - if ($?) { - return try_scrape($self) if ($? >> 8) == 22; # 404 missing - return $lei->child_error($?, "@$cmd failed"); + my $pdir = $lei->rel2abs($self->{dst}); + $pdir =~ s!/[^/]+/?\z!!; + my $ft = File::Temp->new(TEMPLATE => 'manifest-XXXX', + UNLINK => 1, DIR => $pdir); + my $fn = $ft->filename; + my $cmd = $curl->for_uri($lei, $uri, '-R', '-o', $fn); + my $opt = { 0 => $lei->{0}, 1 => $lei->{1}, 2 => $lei->{2} }; + my $cerr = run_reap($lei, $cmd, $opt); + if ($cerr) { + return try_scrape($self) if ($cerr >> 8) == 22; # 404 missing + return $lei->child_error($cerr, "@$cmd failed"); } - my $js; - gunzip(\$gz => \$js, MultiStream => 1) or - die "gunzip($uri): $GunzipError"; - my $m = eval { PublicInbox::Config->json->decode($js) }; - die "$uri: error decoding `$js': $@" if $@; - ref($m) eq 'HASH' or die "$uri unknown type: ".ref($m); - + my $m = decode_manifest($ft, $fn, $uri); my ($path_pfx, $v1_bare, @v2_epochs) = deduce_epochs($m, $path); if (@v2_epochs) { # It may be possible to have v1 + v2 in parallel someday: @@ -254,6 +268,9 @@ EOM $uri->clone } @v2_epochs; clone_v2($self, \@v2_epochs); + my $fin = "$self->{dst}/manifest.js.gz"; + rename($fn, $fin) or die "E: rename($fn, $fin): $!"; + $ft->unlink_on_destroy(0); } elsif (defined $v1_bare) { clone_v1($self); } else { |