From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id B70FA1FB03 for ; Mon, 28 Nov 2022 05:32:36 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org; s=selector1; t=1669613556; bh=w2UBZWVJgxwbLvk9eWP8i4rjDyh6c2viuNCxNFoPng4=; h=From:To:Subject:Date:In-Reply-To:References:From; b=XnT7oFitj2XMhBdoLj98zLBd/bap88jR+enZELg8R91QQC5sl4rRJMuV8as9M55DS 4ApsLLcLtUh6vGscHP2vC8GDMedxdZJl/9PHubmYF1Npw64zRrpY+JjpE+k7PhXSgr DTL/W77p74Vvmw+mWCDGFOWws189O716A3/t7cuk= From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 20/95] clone: support --dry-run / -n flag Date: Mon, 28 Nov 2022 05:31:17 +0000 Message-Id: <20221128053232.291618-21-e@80x24.org> In-Reply-To: <20221128053232.291618-1-e@80x24.org> References: <20221128053232.291618-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: It still makes HTTP(S) requests to retrieve the manifest or scrape HTML, but doesn't make permanent changes to the FS (aside from modifying {acm}time of ${TMPDIR-/tmp}). --- Documentation/public-inbox-clone.pod | 6 +++++ lib/PublicInbox/LeiMirror.pm | 35 ++++++++++++++-------------- script/public-inbox-clone | 6 ++++- t/www_listing.t | 6 +++++ 4 files changed, 35 insertions(+), 18 deletions(-) diff --git a/Documentation/public-inbox-clone.pod b/Documentation/public-inbox-clone.pod index 7e95146e..178d952a 100644 --- a/Documentation/public-inbox-clone.pod +++ b/Documentation/public-inbox-clone.pod @@ -65,6 +65,12 @@ When cloning a top-level with multiple inboxes, ignore inboxes and repositories matching the given wildcard pattern. Supports the same wildcards as L +=item -n + +=item --dry-run + +Show what would be done, without making any changes. + =item -q =item --quiet diff --git a/lib/PublicInbox/LeiMirror.pm b/lib/PublicInbox/LeiMirror.pm index 5e1b1c64..d955ac3b 100644 --- a/lib/PublicInbox/LeiMirror.pm +++ b/lib/PublicInbox/LeiMirror.pm @@ -27,10 +27,10 @@ sub _wq_done_wait { # dwaitpid callback (via wq_eof) my $f = "$mrr->{dst}/mirror.done"; if ($?) { $lei->child_error($?); - } elsif (!unlink($f)) { + } elsif (!$mrr->{dry_run} && !unlink($f)) { warn("unlink($f): $!\n") unless $!{ENOENT}; } else { - if ($lei->{cmd} ne 'public-inbox-clone') { + if (!$mrr->{dry_run} && $lei->{cmd} ne 'public-inbox-clone') { # calls _finish_add_external $lei->lazy_cb('add-external', '_finish_' )->($lei, $mrr->{dst}); @@ -107,7 +107,8 @@ sub ft_rename ($$$) { my @st = stat($dst); my $mode = @st ? ($st[2] & 07777) : ($open_mode & ~umask); chmod($mode, $ft) or croak "E: chmod $fn: $!"; - rename($fn, $dst) or croak "E: rename($fn => $ft): $!"; + require File::Copy; + File::Copy::mv($fn, $dst) or croak "E: mv($fn => $ft): $!"; $ft->unlink_on_destroy(0); } @@ -123,10 +124,11 @@ sub _get_txt_start { # non-fatal my $opt = { 0 => $lei->{0}, 1 => $lei->{1}, 2 => $lei->{2} }; my $cmd = $self->{curl}->for_uri($lei, $uri, qw(--compressed -R -o), $ft->filename); - $self->{"-get_txt.$endpoint"} = [ $ft, $cmd, $uri ]; my $jobs = $lei->{opt}->{jobs} // 1; reap_live() while keys(%LIVE) >= $jobs; $lei->qerr("# @$cmd"); + return if $self->{dry_run}; + $self->{"-get_txt.$endpoint"} = [ $ft, $cmd, $uri ]; $LIVE{spawn($cmd, undef, $opt)} = [ \&_get_txt_done, $self, $endpoint, $fini ]; } @@ -236,6 +238,7 @@ sub start_clone { my $jobs = $self->{lei}->{opt}->{jobs} // 1; reap_live() while keys(%LIVE) >= $jobs; $self->{lei}->qerr("# @$cmd"); + return if $self->{dry_run}; $LIVE{spawn($cmd, undef, $opt)} = [ \&reap_clone, $self, $cmd, $fini ]; } @@ -339,6 +342,7 @@ sub reap_clone { # async, called via SIGCHLD sub v1_done { # called via OnDestroy my ($self) = @_; + return if $self->{dry_run}; _write_inbox_config($self); my $dst = $self->{cur_dst} // $self->{dst}; if (defined(my $o = $self->{-ent} ? $self->{-ent}->{owner} : undef)) { @@ -350,6 +354,7 @@ sub v1_done { # called via OnDestroy sub v2_done { # called via OnDestroy my ($self) = @_; + return if $self->{dry_run}; _write_inbox_config($self); require PublicInbox::MultiGit; my $dst = $self->{cur_dst} // $self->{dst}; @@ -413,7 +418,8 @@ failed to extract epoch number from $src # filter out the epochs we skipped $self->{-culled_manifest} = 1 if delete(@$m{@skip}); - -d $dst || File::Path::mkpath($dst); + (!$self->{dry_run} && !-d $dst) and File::Path::mkpath($dst); + require PublicInbox::Lock; my $lk = bless { lock_path => "$dst/inbox.lock" }, 'PublicInbox::Lock'; my $fini = PublicInbox::OnDestroy->new($$, \&v2_done, $task); @@ -421,7 +427,7 @@ failed to extract epoch number from $src _get_txt_start($task, '_/text/config/raw', $fini); _get_txt_start($self, 'description', $fini); - $task->{-locked} = $lk->lock_for_scope($$); + $task->{-locked} = $lk->lock_for_scope($$) if !$self->{dry_run}; my @cmd = clone_cmd($lei, my $opt = {}); while (@src_edst && !$lei->{child_error}) { my $cmd = [ @$pfx, @cmd, splice(@src_edst, 0, 2) ]; @@ -507,17 +513,12 @@ sub try_manifest { my $path = $uri->path; chop($path) eq '/' or die "BUG: $uri not canonicalized"; $uri->path($path . '/manifest.js.gz'); - my $pdir = $lei->rel2abs($self->{dst}); - $pdir =~ s!/[^/]+/?\z!!; - -d $pdir || File::Path::mkpath($pdir); - my $ft = File::Temp->new(TEMPLATE => 'm-XXXX', - UNLINK => 1, DIR => $pdir, SUFFIX => '.tmp'); + my $ft = File::Temp->new(TEMPLATE => '.manifest-XXXX', + UNLINK => 1, TMPDIR => 1, SUFFIX => '.tmp'); my $fn = $ft->filename; - my ($bn) = ($fn =~ m!/([^/]+)\z!); - my $cmd = $curl->for_uri($lei, $uri, '-R', '-o', $bn); - my $opt = { -C => $pdir }; - $opt->{$_} = $lei->{$_} for (0..2); - my $cerr = run_reap($lei, $cmd, $opt); + my $cmd = $curl->for_uri($lei, $uri, '-R', '-o', $fn); + my %opt = map { $_ => $lei->{$_} } (0..2); + my $cerr = run_reap($lei, $cmd, \%opt); local %LIVE; if ($cerr) { return try_scrape($self) if ($cerr >> 8) == 22; # 404 missing @@ -579,7 +580,7 @@ EOM } } reap_live() while keys(%LIVE); - return if $self->{lei}->{child_error}; + return if $self->{lei}->{child_error} || $self->{dry_run}; if (delete $self->{-culled_manifest}) { # set by clone_v2/-I/--exclude # write the smaller manifest if epochs were skipped so diff --git a/script/public-inbox-clone b/script/public-inbox-clone index ce4697f3..22ffc0fc 100755 --- a/script/public-inbox-clone +++ b/script/public-inbox-clone @@ -17,12 +17,13 @@ options: --torsocks VAL whether or not to wrap git and curl commands with torsocks (default: `auto') Must be one of: `auto', `no' or `yes' + --dry-run | -n show what would be cloned without cloning --verbose | -v increase verbosity (may be repeated) --quiet | -q increase verbosity (may be repeated) -C DIR chdir to specified directory EOF GetOptions($opt, qw(help|h quiet|q verbose|v+ C=s@ c=s@ include|I=s@ exclude=s@ - jobs|j=i no-torsocks torsocks=s epoch=s)) or die $help; + dry-run|n jobs|j=i no-torsocks torsocks=s epoch=s)) or die $help; if ($opt->{help}) { print $help; exit }; require PublicInbox::Admin; # loads Config PublicInbox::Admin::do_chdir(delete $opt->{C}); @@ -54,6 +55,9 @@ my $mrr = bless { src => $url, dst => $dst, }, 'PublicInbox::LeiMirror'; + +$? = 0; +$mrr->{dry_run} = 1 if $lei->{opt}->{'dry-run'}; $mrr->do_mirror; $mrr->can('_wq_done_wait')->([$mrr, $lei], $$); exit(($lei->{child_error} // 0) >> 8); diff --git a/t/www_listing.t b/t/www_listing.t index c13d8f90..45287c7d 100644 --- a/t/www_listing.t +++ b/t/www_listing.t @@ -153,6 +153,12 @@ EOM is(xqx([qw(git config -f), "$tmpdir/incl/alt/config", 'gitweb.owner']), "lorelei \xc4\x80\n", 'gitweb.owner set by -clone'); + $clone_err = ''; + ok(run_script(['-clone', '--dry-run', + "http://$host:$port/pfx", "$tmpdir/dry-run" ], + undef, $opt), 'clone --dry-run') or diag "clone_err=$clone_err"; + ok(!-d "$tmpdir/dry-run", 'nothing cloned with --dry-run'); + undef $td; open $mh, '<', "$tmpdir/incl/manifest.js.gz" or xbail "open: $!";