From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.1 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 304562018A for ; Mon, 28 Nov 2022 05:32:40 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org; s=selector1; t=1669613560; bh=X4e5EUJFnbuCWn4QS18zivZGHUUOW9xu8/KkZyJYolQ=; h=From:To:Subject:Date:In-Reply-To:References:From; b=1GwWKgwnu4U49OzNcBGR1jYQxDLbCC/S1Ycli5eTNqAbgnEUxt4GfaX1lAiIKj/3A Gz/HHiuh8B3uAkRr61wccx7HZ+FJeyM/ltwSjJlcB0bksgsUllGlvk/GPsnVFhNcUL pZ/lzwxFeMFCYZimuiqecy49Wsn6lYSC3pVBUgqY= From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 36/95] clone: support --inbox-version Date: Mon, 28 Nov 2022 05:31:33 +0000 Message-Id: <20221128053232.291618-37-e@80x24.org> In-Reply-To: <20221128053232.291618-1-e@80x24.org> References: <20221128053232.291618-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: This is part of `lei add-external --mirror', and it makes sense to have for development and testing. We'll also add a fallback in case somebody tries --inbox-version and fails due to a newer remote instances of public-inbox. --- Documentation/lei-add-external.pod | 4 +++- Documentation/public-inbox-clone.pod | 6 ++++++ lib/PublicInbox/LeiMirror.pm | 31 +++++++++++++++++----------- script/public-inbox-clone | 2 +- 4 files changed, 29 insertions(+), 14 deletions(-) diff --git a/Documentation/lei-add-external.pod b/Documentation/lei-add-external.pod index 7afcad63..2a131b55 100644 --- a/Documentation/lei-add-external.pod +++ b/Documentation/lei-add-external.pod @@ -75,7 +75,9 @@ Default: C =item --inbox-version=NUM -Force a public-inbox version (must be C<1> or C<2>). +Force a remote public-inbox version (must be C<1> or C<2>). +This is auto-detected by default, and this option exists mainly +for testing. =back diff --git a/Documentation/public-inbox-clone.pod b/Documentation/public-inbox-clone.pod index 52c89cfd..1c31fbb3 100644 --- a/Documentation/public-inbox-clone.pod +++ b/Documentation/public-inbox-clone.pod @@ -76,6 +76,12 @@ no v1 inboxes are present. Default: C +=item --inbox-version=NUM + +Force a remote public-inbox version (must be C<1> or C<2>). +This is auto-detected by default, and this option exists mainly +for testing. + =item -n =item --dry-run diff --git a/lib/PublicInbox/LeiMirror.pm b/lib/PublicInbox/LeiMirror.pm index 18c825d3..c3512d43 100644 --- a/lib/PublicInbox/LeiMirror.pm +++ b/lib/PublicInbox/LeiMirror.pm @@ -43,7 +43,7 @@ sub _wq_done_wait { # dwaitpid callback (via wq_eof) # for old installations without manifest.js.gz sub try_scrape { - my ($self) = @_; + my ($self, $fallback_manifest) = @_; my $uri = URI->new($self->{src}); my $lei = $self->{lei}; my $curl = $self->{curl} //= PublicInbox::LeiCurl->new($lei) or return; @@ -54,9 +54,17 @@ sub try_scrape { close($fh) or return $lei->child_error($?, "@$cmd failed"); # we grep with URL below, we don't want Subject/From headers - # making us clone random URLs + # making us clone random URLs. This assumes remote instances + # prior to public-inbox 1.7.0 + # 5b96edcb1e0d8252 (www: move mirror instructions to /text/, 2021-08-28) my @html = split(/
/, $html); my @urls = ($html[-1] =~ m!\bgit clone --mirror ([a-z\+]+://\S+)!g); + if (!@urls && $fallback_manifest) { + warn <as_string; chop($url) eq '/' or die "BUG: $uri not canonicalized"; @@ -603,7 +611,6 @@ sub try_manifest { my $cmd = $curl->for_uri($lei, $uri, '-R', '-o', $fn); my %opt = map { $_ => $lei->{$_} } (0..2); my $cerr = run_reap($lei, $cmd, \%opt); - local $LIVE; if ($cerr) { return try_scrape($self) if ($cerr >> 8) == 22; # 404 missing return $lei->child_error($cerr, "@$cmd failed"); @@ -698,15 +705,15 @@ sub do_mirror { # via wq_io_do or public-inbox-clone $ic =~ /\A(?:v1|v2|always|never)\z/s or die <<""; --inbox-config must be one of `always', `v2', `v1', or `never' - my $iv = $lei->{opt}->{'inbox-version'}; - if (defined $iv) { - local $LIVE; - return clone_v1($self) if $iv == 1; - return try_scrape($self) if $iv == 2; - die "bad --inbox-version=$iv\n"; - } - return start_clone_url($self) if $self->{src} =~ m!://!; - die "TODO: cloning local directories not supported, yet"; + local $LIVE; + my $iv = $lei->{opt}->{'inbox-version'} // + return start_clone_url($self); + return clone_v1($self) if $iv == 1; + die "bad --inbox-version=$iv\n" if $iv != 2; + die <{src} !~ m!://!; +cloning local v2 inboxes not supported +EOM + try_scrape($self, 1); }; $lei->fail($@) if $@; } diff --git a/script/public-inbox-clone b/script/public-inbox-clone index 3d980c97..2900f232 100755 --- a/script/public-inbox-clone +++ b/script/public-inbox-clone @@ -23,7 +23,7 @@ options: -C DIR chdir to specified directory EOF GetOptions($opt, qw(help|h quiet|q verbose|v+ C=s@ c=s@ include|I=s@ exclude=s@ - inbox-config=s + inbox-config=s inbox-version=i dry-run|n jobs|j=i no-torsocks torsocks=s epoch=s)) or die $help; if ($opt->{help}) { print $help; exit }; require PublicInbox::Admin; # loads Config