diff options
-rw-r--r-- | Documentation/public-inbox-clone.pod | 71 | ||||
-rw-r--r-- | Documentation/public-inbox-fetch.pod | 63 | ||||
-rw-r--r-- | MANIFEST | 5 | ||||
-rw-r--r-- | lib/PublicInbox/Admin.pm | 8 | ||||
-rw-r--r-- | lib/PublicInbox/Fetch.pm | 145 | ||||
-rw-r--r-- | lib/PublicInbox/LEI.pm | 6 | ||||
-rw-r--r-- | lib/PublicInbox/LeiMirror.pm | 95 | ||||
-rwxr-xr-x | script/public-inbox-clone | 58 | ||||
-rwxr-xr-x | script/public-inbox-fetch | 35 | ||||
-rw-r--r-- | t/lei-mirror.t | 29 |
10 files changed, 475 insertions, 40 deletions
diff --git a/Documentation/public-inbox-clone.pod b/Documentation/public-inbox-clone.pod new file mode 100644 index 00000000..fdb57663 --- /dev/null +++ b/Documentation/public-inbox-clone.pod @@ -0,0 +1,71 @@ +=head1 NAME + +public-inbox-clone - "git clone --mirror" wrapper + +=head1 SYNOPSIS + +public-inbox-clone INBOX_URL [INBOX_DIR] + +=head1 DESCRIPTION + +public-inbox-clone is a wrapper around C<git clone --mirror> for +making the initial clone of a remote HTTP(S) public-inbox. It +allows cloning multi-epoch v2 inboxes with a single command and +zero configuration. + +It does not run L<public-inbox-init(1)> nor +L<public-inbox-index(1)>. Those commands must be run separately +if serving/searching the mirror is required. As-is, +public-inbox-clone is suitable for creating a git-only backup. + +public-inbox-clone does not use nor require any extra +configuration files (not even C<~/.public-inbox/config>). + +L<public-inbox-fetch(1)> may be used to keep C<INBOX_DIR> +up-to-date. + +For v2 inboxes, it will create a C<$INBOX_DIR/manifest.js.gz> +file to speed up subsequent L<public-inbox-fetch(1)>. + +=head1 OPTIONS + +=over + +=item -q + +=item --quiet + +Quiets down progress messages, also passed to L<git-fetch(1)>. + +=item -v + +=item --verbose + +Increases verbosity, also passed to L<git-fetch(1)>. + +=item --torsocks=auto|no|yes + +=item --no-torsocks + +Whether to wrap L<git(1)> and L<curl(1)> commands with torsocks. + +Default: C<auto> + +=back + +=head1 CONTACT + +Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> + +The mail archives are hosted at L<https://public-inbox.org/meta/> and +L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> + +=head1 COPYRIGHT + +Copyright all contributors L<mailto:meta@public-inbox.org> + +License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> + +=head1 SEE ALSO + +L<public-inbox-fetch(1)>, L<public-inbox-init(1)>, L<public-inbox-index(1)> diff --git a/Documentation/public-inbox-fetch.pod b/Documentation/public-inbox-fetch.pod new file mode 100644 index 00000000..7944fdcd --- /dev/null +++ b/Documentation/public-inbox-fetch.pod @@ -0,0 +1,63 @@ +=head1 NAME + +public-inbox-fetch - "git fetch" wrapper for v2 inbox mirrors + +=head1 SYNOPSIS + +public-inbox-fetch -C INBOX_DIR + +=head1 DESCRIPTION + +public-inbox-fetch updates git storage of public-inbox mirrors. +With v2 inboxes, it allows detection of new epochs and avoids +unnecessary traffic on old epochs. + +public-inbox-fetch does not use nor require any configuration +files of its own. + +It does not run L<public-inbox-index(1)>, making it suitable +for maintaining git-only backups. + +For v2 inboxes, it will maintain C<$INBOX_DIR/manifest.js.gz> +file to speed up future invocations. + +=head1 OPTIONS + +=over + +=item -q + +=item --quiet + +Quiets down progress messages, also passed to L<git-fetch(1)>. + +=item -v + +=item --verbose + +Increases verbosity, also passed to L<git-fetch(1)>. + +=item --torsocks=auto|no|yes + +=item --no-torsocks + +Whether to wrap L<git(1)> and L<curl(1)> commands with torsocks. + +Default: C<auto> + +=head1 CONTACT + +Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> + +The mail archives are hosted at L<https://public-inbox.org/meta/> and +L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> + +=head1 COPYRIGHT + +Copyright all contributors L<mailto:meta@public-inbox.org> + +License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> + +=head1 SEE ALSO + +L<public-inbox-index(1)> @@ -51,6 +51,7 @@ Documentation/lei.pod Documentation/lei_design_notes.txt Documentation/marketing.txt Documentation/mknews.perl +Documentation/public-inbox-clone.pod Documentation/public-inbox-compact.pod Documentation/public-inbox-config.pod Documentation/public-inbox-convert.pod @@ -58,6 +59,7 @@ Documentation/public-inbox-daemon.pod Documentation/public-inbox-edit.pod Documentation/public-inbox-extindex-format.pod Documentation/public-inbox-extindex.pod +Documentation/public-inbox-fetch.pod Documentation/public-inbox-glossary.pod Documentation/public-inbox-httpd.pod Documentation/public-inbox-imapd.pod @@ -163,6 +165,7 @@ lib/PublicInbox/ExtSearchIdx.pm lib/PublicInbox/FakeImport.pm lib/PublicInbox/FakeInotify.pm lib/PublicInbox/Feed.pm +lib/PublicInbox/Fetch.pm lib/PublicInbox/Filter/Base.pm lib/PublicInbox/Filter/Gmane.pm lib/PublicInbox/Filter/Mirror.pm @@ -329,10 +332,12 @@ sa_config/README sa_config/root/etc/spamassassin/public-inbox.pre sa_config/user/.spamassassin/user_prefs script/lei +script/public-inbox-clone script/public-inbox-compact script/public-inbox-convert script/public-inbox-edit script/public-inbox-extindex +script/public-inbox-fetch script/public-inbox-httpd script/public-inbox-imapd script/public-inbox-index diff --git a/lib/PublicInbox/Admin.pm b/lib/PublicInbox/Admin.pm index 2534958b..9ff59bca 100644 --- a/lib/PublicInbox/Admin.pm +++ b/lib/PublicInbox/Admin.pm @@ -372,4 +372,12 @@ sub index_prepare ($$) { $env; } +sub do_chdir ($) { + my $chdir = $_[0] // return; + for my $d (@$chdir) { + next if $d eq ''; # same as git(1) + chdir $d or die "cd $d: $!"; + } +} + 1; diff --git a/lib/PublicInbox/Fetch.pm b/lib/PublicInbox/Fetch.pm new file mode 100644 index 00000000..d795731c --- /dev/null +++ b/lib/PublicInbox/Fetch.pm @@ -0,0 +1,145 @@ +# Copyright (C) all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +# Wrapper to "git fetch" remote public-inboxes +package PublicInbox::Fetch; +use strict; +use v5.10.1; +use parent qw(PublicInbox::IPC); +use URI (); +use PublicInbox::Spawn qw(popen_rd); +use PublicInbox::Admin; +use PublicInbox::LEI; +use PublicInbox::LeiCurl; +use PublicInbox::LeiMirror; +use IO::Uncompress::Gunzip qw(gunzip $GunzipError); +use File::Temp (); + +sub new { bless {}, __PACKAGE__ } + +sub fetch_cmd ($$) { + my ($lei, $opt) = @_; + my @cmd = qw(git); + $opt->{$_} = $lei->{$_} for (0..2); + # we support "-c $key=$val" for arbitrary git config options + # e.g.: git -c http.proxy=socks5h://127.0.0.1:9050 + push(@cmd, '-c', $_) for @{$lei->{opt}->{c} // []}; + push @cmd, 'fetch'; + push @cmd, '-q' if $lei->{opt}->{quiet}; + push @cmd, '-v' if $lei->{opt}->{verbose}; + @cmd; +} + +sub remote_url ($$) { + my ($lei, $dir) = @_; # TODO: support non-"origin"? + my $cmd = [ qw(git config remote.origin.url) ]; + my $fh = popen_rd($cmd, undef, { -C => $dir, 2 => $lei->{2} }); + my $url = <$fh>; + close $fh or return; + chomp $url; + $url; +} + +sub do_fetch { + my ($cls, $lei, $cd) = @_; + my $ibx_ver; + my $curl = PublicInbox::LeiCurl->new($lei) or return; + my $dir = PublicInbox::Admin::resolve_inboxdir($cd, \$ibx_ver); + if ($ibx_ver == 1) { + my $url = remote_url($lei, $dir) // + die "E: $dir missing remote.origin.url\n"; + my $uri = URI->new($url); + my $torsocks = $curl->torsocks($lei, $uri); + my $opt = { -C => $dir }; + my $cmd = [ @$torsocks, fetch_cmd($lei, $opt) ]; + my $cerr = PublicInbox::LeiMirror::run_reap($lei, $cmd, $opt); + $lei->child_error($cerr, "@$cmd failed") if $cerr; + return; + } + # v2: + opendir my $dh, "$dir/git" or die "opendir $dir/git: $!"; + my @epochs = sort { $b <=> $a } map { substr($_, 0, -4) + 0 } + grep(/\A[0-9]+\.git\z/, readdir($dh)); + my ($git_url, $epoch); + for my $nr (@epochs) { # try newest epoch, first + my $edir = "$dir/git/$nr.git"; + if (defined(my $url = remote_url($lei, $edir))) { + $git_url = $url; + $epoch = $nr; + last; + } else { + warn "W: $edir missing remote.origin.url\n"; + } + } + $git_url or die "Unable to determine git URL\n"; + my $inbox_url = $git_url; + $inbox_url =~ s!/git/$epoch(?:\.git)?/?\z!! or + $inbox_url =~ s!/$epoch(?:\.git)?/?\z!! or die <<EOM; +Unable to infer inbox URL from <$git_url> +EOM + $lei->qerr("# inbox URL: $inbox_url/"); + my $muri = URI->new("$inbox_url/manifest.js.gz"); + my $ft = File::Temp->new(TEMPLATE => 'manifest-XXXX', + UNLINK => 1, DIR => $dir); + my $fn = $ft->filename; + my @opt = (qw(-R -o), $fn); + my $mf = "$dir/manifest.js.gz"; + my $m0; # current manifest.js.gz contents + if (open my $fh, '<', $mf) { + $m0 = eval { + PublicInbox::LeiMirror::decode_manifest($fh, $mf, $mf) + }; + $lei->err($@) if $@; + push @opt, '-z', $mf if defined($m0); + } + my $curl_cmd = $curl->for_uri($lei, $muri, @opt); + my $opt = {}; + $opt->{$_} = $lei->{$_} for (0..2); + my $cerr = PublicInbox::LeiMirror::run_reap($lei, $curl_cmd, $opt); + return $lei->child_error($cerr, "@$curl_cmd failed") if $cerr; + return if !-s $ft; # 304 Not Modified via curl -z + + my $m1 = PublicInbox::LeiMirror::decode_manifest($ft, $fn, $muri); + my $mdiff = { %$m1 }; + + # filter out unchanged entries + while (my ($k, $v0) = each %{$m0 // {}}) { + my $cur = $m1->{$k} // next; + my $f0 = $v0->{fingerprint} // next; + my $f1 = $cur->{fingerprint} // next; + my $t0 = $v0->{modified} // next; + my $t1 = $cur->{modified} // next; + delete($mdiff->{$k}) if $f0 eq $f1 && $t0 == $t1; + } + my $ibx_uri = URI->new("$inbox_url/"); + my ($path_pfx, $v1_bare, @v2_epochs) = + PublicInbox::LeiMirror::deduce_epochs($mdiff, $ibx_uri->path); + defined($v1_bare) and die <<EOM; +E: got v1 `$v1_bare' when expecting v2 epoch(s) in <$muri>, WTF? +EOM + my @epoch_nr = sort { $a <=> $b } + map { my ($nr) = (m!/([0-9]+)\.git\z!g) } @v2_epochs; + + # n.b. this expects all epochs are from the same host + my $torsocks = $curl->torsocks($lei, $muri); + for my $nr (@epoch_nr) { + my $dir = "$dir/git/$nr.git"; + my $cmd; + my $opt = {}; + if (-d $dir) { + $opt->{-C} = $dir; + $cmd = [ @$torsocks, fetch_cmd($lei, $opt) ]; + } else { + my $e_uri = $ibx_uri->clone; + $e_uri->path($ibx_uri->path."git/$nr.git"); + $cmd = [ @$torsocks, + PublicInbox::LeiMirror::clone_cmd($lei, $opt), + $$e_uri, $dir ]; + } + my $cerr = PublicInbox::LeiMirror::run_reap($lei, $cmd, $opt); + return $lei->child_error($cerr, "@$cmd failed") if $cerr; + } + rename($fn, $mf) or die "E: rename($fn, $mf): $!\n"; + $ft->unlink_on_destroy(0); +} + +1; diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm index aff2bf19..6d5d3c03 100644 --- a/lib/PublicInbox/LEI.pm +++ b/lib/PublicInbox/LEI.pm @@ -468,6 +468,8 @@ sub x_it ($$) { $self->{pkt_op_p}->pkt_do('x_it', $code); } elsif ($self->{sock}) { # to lei(1) client send($self->{sock}, "x_it $code", MSG_EOR); + } elsif ($quit == \&CORE::exit) { # an admin command + exit($code >> 8); } # else ignore if client disconnected } @@ -511,7 +513,7 @@ sub fail ($$;$) { my ($self, $buf, $exit_code) = @_; $self->{failed}++; err($self, $buf) if defined $buf; - # calls fail_handler: + # calls fail_handler $self->{pkt_op_p}->pkt_do('!') if $self->{pkt_op_p}; x_it($self, ($exit_code // 1) << 8); undef; @@ -536,6 +538,8 @@ sub child_error { # passes non-fatal curl exit codes to user $self->{pkt_op_p}->pkt_do('child_error', $child_error); } elsif ($self->{sock}) { # to lei(1) client send($self->{sock}, "child_error $child_error", MSG_EOR); + } else { # non-lei admin command + $self->{child_error} ||= $child_error; } # else noop if client disconnected } diff --git a/lib/PublicInbox/LeiMirror.pm b/lib/PublicInbox/LeiMirror.pm index 355813bd..c128d13d 100644 --- a/lib/PublicInbox/LeiMirror.pm +++ b/lib/PublicInbox/LeiMirror.pm @@ -8,6 +8,7 @@ use v5.10.1; use parent qw(PublicInbox::IPC); use IO::Uncompress::Gunzip qw(gunzip $GunzipError); use PublicInbox::Spawn qw(popen_rd spawn); +use File::Temp (); sub do_finish_mirror { # dwaitpid callback my ($arg, $pid) = @_; @@ -18,7 +19,9 @@ sub do_finish_mirror { # dwaitpid callback } elsif (!unlink($f)) { $lei->err("unlink($f): $!") unless $!{ENOENT}; } else { - $lei->add_external_finish($mrr->{dst}); + if ($lei->{cmd} ne 'public-inbox-clone') { + $lei->add_external_finish($mrr->{dst}); + } $lei->qerr("# mirrored $mrr->{src} => $mrr->{dst}"); } $lei->dclose; @@ -121,33 +124,38 @@ sub _try_config { sub index_cloned_inbox { my ($self, $iv) = @_; - my $ibx = delete($self->{ibx}) // { - address => [ 'lei@example.com' ], - version => $iv, - }; - $ibx->{inboxdir} = $self->{dst}; - PublicInbox::Inbox->new($ibx); - PublicInbox::InboxWritable->new($ibx); - my $opt = {}; my $lei = $self->{lei}; - for my $sw ($lei->index_opt) { - my ($k) = ($sw =~ /\A([\w-]+)/); - $opt->{$k} = $lei->{opt}->{$k}; + + # n.b. public-inbox-clone works w/o (SQLite || Xapian) + # lei is useless without Xapian + SQLite + if ($lei->{cmd} ne 'public-inbox-clone') { + my $ibx = delete($self->{ibx}) // { + address => [ 'lei@example.com' ], + version => $iv, + }; + $ibx->{inboxdir} = $self->{dst}; + PublicInbox::Inbox->new($ibx); + PublicInbox::InboxWritable->new($ibx); + my $opt = {}; + for my $sw ($lei->index_opt) { + my ($k) = ($sw =~ /\A([\w-]+)/); + $opt->{$k} = $lei->{opt}->{$k}; + } + # force synchronous dwaitpid for v2: + local $PublicInbox::DS::in_loop = 0; + my $cfg = PublicInbox::Config->new(undef, $lei->{2}); + my $env = PublicInbox::Admin::index_prepare($opt, $cfg); + local %ENV = (%ENV, %$env) if $env; + PublicInbox::Admin::progress_prepare($opt, $lei->{2}); + PublicInbox::Admin::index_inbox($ibx, undef, $opt); } - # force synchronous dwaitpid for v2: - local $PublicInbox::DS::in_loop = 0; - my $cfg = PublicInbox::Config->new(undef, $lei->{2}); - my $env = PublicInbox::Admin::index_prepare($opt, $cfg); - local %ENV = (%ENV, %$env) if $env; - PublicInbox::Admin::progress_prepare($opt, $lei->{2}); - PublicInbox::Admin::index_inbox($ibx, undef, $opt); open my $x, '>', "$self->{dst}/mirror.done"; # for do_finish_mirror } sub run_reap { my ($lei, $cmd, $opt) = @_; $lei->qerr("# @$cmd"); - $opt->{pgid} = 0; + $opt->{pgid} = 0 if $lei->{sock}; my $pid = spawn($cmd, undef, $opt); my $reap = PublicInbox::OnDestroy->new($lei->can('sigint_reap'), $pid); waitpid($pid, 0) == $pid or die "waitpid @$cmd: $!"; @@ -205,6 +213,7 @@ sub deduce_epochs ($$) { my ($m, $path) = @_; my ($v1_bare, @v2_epochs); my $path_pfx = ''; + $path =~ s!/+\z!!; do { $v1_bare = $m->{$path}; @v2_epochs = grep(m!\A\Q$path\E/git/[0-9]+\.git\z!, keys %$m); @@ -213,6 +222,18 @@ sub deduce_epochs ($$) { ($path_pfx, $v1_bare, @v2_epochs); } +sub decode_manifest ($$$) { + my ($fh, $fn, $uri) = @_; + my $js; + my $gz = do { local $/; <$fh> } // die "slurp($fn): $!"; + gunzip(\$gz => \$js, MultiStream => 1) or + die "gunzip($uri): $GunzipError\n"; + my $m = eval { PublicInbox::Config->json->decode($js) }; + die "$uri: error decoding `$js': $@\n" if $@; + ref($m) eq 'HASH' or die "$uri unknown type: ".ref($m); + $m; +} + sub try_manifest { my ($self) = @_; my $uri = URI->new($self->{src}); @@ -221,26 +242,19 @@ sub try_manifest { my $path = $uri->path; chop($path) eq '/' or die "BUG: $uri not canonicalized"; $uri->path($path . '/manifest.js.gz'); - my $cmd = $curl->for_uri($lei, $uri); - $lei->qerr("# @$cmd"); - my $opt = { 0 => $lei->{0}, 2 => $lei->{2} }; - my ($fh, $pid) = popen_rd($cmd, undef, $opt); - my $reap = PublicInbox::OnDestroy->new($lei->can('sigint_reap'), $pid); - my $gz = do { local $/; <$fh> } // die "read(curl $uri): $!"; - close $fh; - waitpid($pid, 0) == $pid or die "waitpid @$cmd: $!"; - @$reap = (); - if ($?) { - return try_scrape($self) if ($? >> 8) == 22; # 404 missing - return $lei->child_error($?, "@$cmd failed"); + my $pdir = $lei->rel2abs($self->{dst}); + $pdir =~ s!/[^/]+/?\z!!; + my $ft = File::Temp->new(TEMPLATE => 'manifest-XXXX', + UNLINK => 1, DIR => $pdir); + my $fn = $ft->filename; + my $cmd = $curl->for_uri($lei, $uri, '-R', '-o', $fn); + my $opt = { 0 => $lei->{0}, 1 => $lei->{1}, 2 => $lei->{2} }; + my $cerr = run_reap($lei, $cmd, $opt); + if ($cerr) { + return try_scrape($self) if ($cerr >> 8) == 22; # 404 missing + return $lei->child_error($cerr, "@$cmd failed"); } - my $js; - gunzip(\$gz => \$js, MultiStream => 1) or - die "gunzip($uri): $GunzipError"; - my $m = eval { PublicInbox::Config->json->decode($js) }; - die "$uri: error decoding `$js': $@" if $@; - ref($m) eq 'HASH' or die "$uri unknown type: ".ref($m); - + my $m = decode_manifest($ft, $fn, $uri); my ($path_pfx, $v1_bare, @v2_epochs) = deduce_epochs($m, $path); if (@v2_epochs) { # It may be possible to have v1 + v2 in parallel someday: @@ -254,6 +268,9 @@ EOM $uri->clone } @v2_epochs; clone_v2($self, \@v2_epochs); + my $fin = "$self->{dst}/manifest.js.gz"; + rename($fn, $fin) or die "E: rename($fn, $fin): $!"; + $ft->unlink_on_destroy(0); } elsif (defined $v1_bare) { clone_v1($self); } else { diff --git a/script/public-inbox-clone b/script/public-inbox-clone new file mode 100755 index 00000000..2b18969f --- /dev/null +++ b/script/public-inbox-clone @@ -0,0 +1,58 @@ +#!perl -w +# Copyright (C) all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +# Wrapper to git clone remote public-inboxes +use strict; +use v5.10.1; +use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); +my $opt = {}; +my $help = <<EOF; # the following should fit w/o scrolling in 80x24 term: +usage: public-inbox-clone INBOX_URL [DESTINATION] + + clone remote public-inboxes + +options: + + --torsocks VAL whether or not to wrap git and curl commands with + torsocks (default: `auto') + Must be one of: `auto', `no' or `yes' + --verbose | -v increase verbosity (may be repeated) + --quiet | -q increase verbosity (may be repeated) + -C DIR chdir to specified directory +EOF +GetOptions($opt, qw(help|h quiet|q verbose|v+ C=s@ c=s@ + no-torsocks torsocks=s)) or die $help; +if ($opt->{help}) { print $help; exit }; +require PublicInbox::Admin; # loads Config +PublicInbox::Admin::do_chdir(delete $opt->{C}); +PublicInbox::Admin::setup_signals(); +$SIG{PIPE} = 'IGNORE'; + +my ($url, $dst, $extra) = @ARGV; +die $help if !defined($url) || defined($extra); +defined($dst) or ($dst) = ($url =~ m!/([^/]+)/?\z!); +index($dst, "\n") >= 0 and die "`\\n' not allowed in `$dst'"; + +# n.b. this is still a truckload of code... +require URI; +require PublicInbox::LEI; +require PublicInbox::LeiExternal; +require PublicInbox::LeiMirror; +require PublicInbox::LeiCurl; +require PublicInbox::Lock; + +$url = PublicInbox::LeiExternal::ext_canonicalize($url); +my $lei = bless { + env => \%ENV, opt => $opt, cmd => 'public-inbox-clone', + 0 => *STDIN{GLOB}, 2 => *STDERR{GLOB}, +}, 'PublicInbox::LEI'; +open $lei->{1}, '+<&=', 1 or die "dup: $!"; +open $lei->{3}, '.' or die "open . $!"; +my $mrr = bless { + lei => $lei, + src => $url, + dst => $dst, +}, 'PublicInbox::LeiMirror'; +$mrr->do_mirror; +$mrr->can('do_finish_mirror')->([$mrr, $lei], $$); +exit(($lei->{child_error} // 0) >> 8); diff --git a/script/public-inbox-fetch b/script/public-inbox-fetch new file mode 100755 index 00000000..5d303574 --- /dev/null +++ b/script/public-inbox-fetch @@ -0,0 +1,35 @@ +#!perl -w +# Copyright (C) all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +# Wrapper to git fetch remote public-inboxes +use strict; +use v5.10.1; +use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); +my $opt = {}; +my $help = <<EOF; # the following should fit w/o scrolling in 80x24 term: +usage: public-inbox-fetch -C DESTINATION + + fetch remote public-inboxes + +options: + + --torsocks VAL whether or not to wrap git and curl commands with + torsocks (default: `auto') + Must be one of: `auto', `no' or `yes' + --verbose | -v increase verbosity (may be repeated) + --quiet | -q increase verbosity (may be repeated) + -C DIR chdir to specified directory +EOF +GetOptions($opt, qw(help|h quiet|q verbose|v+ C=s@ c=s@ + no-torsocks torsocks=s)) or die $help; +if ($opt->{help}) { print $help; exit }; +require PublicInbox::Fetch; # loads Admin +PublicInbox::Admin::do_chdir(delete $opt->{C}); +PublicInbox::Admin::setup_signals(); +$SIG{PIPE} = 'IGNORE'; + +my $lei = bless { + env => \%ENV, opt => $opt, cmd => 'public-inbox-fetch', + 0 => *STDIN{GLOB}, 1 => *STDOUT{GLOB}, 2 => *STDERR{GLOB}, +}, 'PublicInbox::LEI'; +PublicInbox::Fetch->do_fetch($lei, '.'); diff --git a/t/lei-mirror.t b/t/lei-mirror.t index a61a7565..75e25b3f 100644 --- a/t/lei-mirror.t +++ b/t/lei-mirror.t @@ -46,6 +46,7 @@ test_lei({ tmpdir => $tmpdir }, sub { lei_ok('add-external', "$t1-pfx", '--mirror', "$http/pfx/t1/", \'--mirror v1 w/ PSGI prefix'); + ok(!-e "$t1-pfx/mirror.done", 'no leftover mirror.done'); my $d = "$home/404"; ok(!lei(qw(add-external --mirror), "$http/404", $d), 'mirror 404'); @@ -77,6 +78,34 @@ test_lei({ tmpdir => $tmpdir }, sub { } # for }); +SKIP: { + undef $sock; + my $d = "$tmpdir/d"; + mkdir $d or xbail "mkdir $d $!"; + my $opt = { -C => $d, 2 => \(my $err) }; + ok(!run_script([qw(-clone -q), "$http/404"], undef, $opt), '404 fails'); + ok(!-d "$d/404", 'destination not created'); + delete $opt->{2}; + + ok(run_script([qw(-clone -q -C), $d, "$http/t2"], undef, $opt), + '-clone succeeds on v2'); + ok(-d "$d/t2/git/0.git", 'epoch cloned'); + ok(-f "$d/t2/manifest.js.gz", 'manifest saved'); + ok(!-e "$d/t2/mirror.done", 'no leftover mirror.done'); + ok(run_script([qw(-fetch -q -C), "$d/t2"], undef, $opt), + '-fetch succeeds w/ manifest.js.gz'); + unlink("$d/t2/manifest.js.gz") or xbail "unlink $!"; + ok(run_script([qw(-fetch -q -C), "$d/t2"], undef, $opt), + '-fetch succeeds w/o manifest.js.gz'); + + ok(run_script([qw(-clone -q -C), $d, "$http/t1"], undef, $opt), + 'cloning v1 works'); + ok(-d "$d/t1", 'v1 cloned'); + ok(!-e "$d/t1/mirror.done", 'no leftover file'); + ok(run_script([qw(-fetch -q -C), "$d/t1"], undef, $opt), + 'fetching v1 works'); +} + ok($td->kill, 'killed -httpd'); $td->join; |