user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download mbox.gz: |
* [PATCH 0/7] new public-inbox-{clone,fetch} commands
@ 2021-09-12  7:47  7% Eric Wong
  2021-09-12  7:47  4% ` [PATCH 4/7] clone|lei_mirror: write description in mirrors Eric Wong
  0 siblings, 1 reply; 2+ results
From: Eric Wong @ 2021-09-12  7:47 UTC (permalink / raw)
  To: meta

Hopefully, these new commands make maintaining mirrors of a
single (or handful of) multi-epoch v2 inboxes easier and less
error-prone.

Unlike grokmirror:
* these commands do not require extra config files of any kind
* they only allow cloning/fetching a single inbox per-invocation

"description" files also default to something more meaningful
for both public-inbox-init and -clone.

PATCH 7/7 also begins laying the groundwork for a v1 => v2
migration path which doesn't involve existing mirrors
having to redownload everything.

Eric Wong (7):
  lei_mirror: simplify error reporting
  lei_mirror: fix error message
  new public-inbox-{clone,fetch} commands
  clone|lei_mirror: write description in mirrors
  import: do not write a "description" file
  init: set a useful description
  fetch: use manifest.js.gz for v1

 Documentation/public-inbox-clone.pod |  71 +++++++++++
 Documentation/public-inbox-fetch.pod |  63 ++++++++++
 MANIFEST                             |   5 +
 lib/PublicInbox/Admin.pm             |   8 ++
 lib/PublicInbox/Fetch.pm             | 172 +++++++++++++++++++++++++++
 lib/PublicInbox/Import.pm            |   3 -
 lib/PublicInbox/LEI.pm               |   6 +-
 lib/PublicInbox/LeiMirror.pm         | 167 ++++++++++++++++----------
 lib/PublicInbox/TestCommon.pm        |   3 +-
 script/public-inbox-clone            |  58 +++++++++
 script/public-inbox-fetch            |  35 ++++++
 script/public-inbox-init             |   6 +
 t/init.t                             |   3 +
 t/lei-mirror.t                       |  52 ++++++++
 t/www_listing.t                      |   1 -
 15 files changed, 585 insertions(+), 68 deletions(-)
 create mode 100644 Documentation/public-inbox-clone.pod
 create mode 100644 Documentation/public-inbox-fetch.pod
 create mode 100644 lib/PublicInbox/Fetch.pm
 create mode 100755 script/public-inbox-clone
 create mode 100755 script/public-inbox-fetch

^ permalink raw reply	[relevance 7%]

* [PATCH 4/7] clone|lei_mirror: write description in mirrors
  2021-09-12  7:47  7% [PATCH 0/7] new public-inbox-{clone,fetch} commands Eric Wong
@ 2021-09-12  7:47  4% ` Eric Wong
  0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2021-09-12  7:47 UTC (permalink / raw)
  To: meta

Instead of generic "Unnamed repository" or "missing" messages,
show "mirror of $URL" since it seems like a better default when
creating a mirror.
---
 lib/PublicInbox/LeiMirror.pm | 63 +++++++++++++++++++++++++-----------
 t/lei-mirror.t               | 23 +++++++++++++
 2 files changed, 68 insertions(+), 18 deletions(-)

diff --git a/lib/PublicInbox/LeiMirror.pm b/lib/PublicInbox/LeiMirror.pm
index c128d13d..fe1cefe2 100644
--- a/lib/PublicInbox/LeiMirror.pm
+++ b/lib/PublicInbox/LeiMirror.pm
@@ -9,6 +9,7 @@ use parent qw(PublicInbox::IPC);
 use IO::Uncompress::Gunzip qw(gunzip $GunzipError);
 use PublicInbox::Spawn qw(popen_rd spawn);
 use File::Temp ();
+use Fcntl qw(SEEK_SET);
 
 sub do_finish_mirror { # dwaitpid callback
 	my ($arg, $pid) = @_;
@@ -87,6 +88,27 @@ sub clone_cmd {
 	@cmd;
 }
 
+sub _get_txt { # non-fatal
+	my ($self, $endpoint, $file) = @_;
+	my $uri = URI->new($self->{src});
+	my $lei = $self->{lei};
+	my $path = $uri->path;
+	chop($path) eq '/' or die "BUG: $uri not canonicalized";
+	$uri->path("$path/$endpoint");
+	my $cmd = $self->{curl}->for_uri($lei, $uri, '--compressed');
+	my $ce = "$self->{dst}/$file";
+	my $ft = File::Temp->new(TEMPLATE => "$file-XXXX",
+				UNLINK => 1, DIR => $self->{dst});
+	my $opt = { 0 => $lei->{0}, 1 => $ft, 2 => $lei->{2} };
+	my $cerr = run_reap($lei, $cmd, $opt);
+	return "$uri missing" if ($cerr >> 8) == 22;
+	return "# @$cmd failed (non-fatal)" if $cerr;
+	my $f = $ft->filename;
+	rename($f, $ce) or return "rename($f, $ce): $! (non-fatal)";
+	$ft->unlink_on_destroy(0);
+	undef; # success
+}
+
 # tries the relatively new /$INBOX/_/text/config/raw endpoint
 sub _try_config {
 	my ($self) = @_;
@@ -96,24 +118,10 @@ sub _try_config {
 		File::Path::mkpath($dst);
 		-d $dst or die "mkpath($dst): $!\n";
 	}
-	my $uri = URI->new($self->{src});
-	my $lei = $self->{lei};
-	my $path = $uri->path;
-	chop($path) eq '/' or die "BUG: $uri not canonicalized";
-	$uri->path($path . '/_/text/config/raw');
-	my $cmd = $self->{curl}->for_uri($lei, $uri, '--compressed');
-	my $ce = "$dst/inbox.config.example";
-	my $f = "$ce-$$.tmp";
-	open(my $fh, '+>', $f) or return $lei->err("open $f: $! (non-fatal)");
-	my $opt = { 0 => $lei->{0}, 1 => $fh, 2 => $lei->{2} };
-	my $cerr = run_reap($lei, $cmd, $opt);
-	if (($cerr >> 8) == 22) { # 404 missing
-		unlink($f) if -s $fh == 0;
-		return;
-	}
-	return $lei->err("# @$cmd failed (non-fatal)") if $cerr;
-	rename($f, $ce) or return $lei->err("rename($f, $ce): $! (non-fatal)");
-	my $cfg = PublicInbox::Config->git_config_dump($f, $lei->{2});
+	my $err = _get_txt($self, qw(_/text/config/raw inbox.config.example));
+	return $self->{lei}->err($err) if $err;
+	my $f = "$self->{dst}/inbox.config.example";
+	my $cfg = PublicInbox::Config->git_config_dump($f, $self->{lei}->{2});
 	my $ibx = $self->{ibx} = {};
 	for my $sec (grep(/\Apublicinbox\./, @{$cfg->{-section_order}})) {
 		for (qw(address newsgroup nntpmirror)) {
@@ -122,9 +130,28 @@ sub _try_config {
 	}
 }
 
+sub set_description ($) {
+	my ($self) = @_;
+	my $f = "$self->{dst}/description";
+	open my $fh, '+>>', $f or die "open($f): $!";
+	seek($fh, 0, SEEK_SET) or die "seek($f): $!";
+	chomp(my $d = do { local $/; <$fh> } // die "read($f): $!");
+	if ($d eq '($INBOX_DIR/description missing)' ||
+			$d =~ /^Unnamed repository/ || $d !~ /\S/) {
+		seek($fh, 0, SEEK_SET) or die "seek($f): $!";
+		truncate($fh, 0) or die "truncate($f): $!";
+		print $fh "mirror of $self->{src}\n" or die "print($f): $!";
+		close $fh or die "close($f): $!";
+	}
+}
+
 sub index_cloned_inbox {
 	my ($self, $iv) = @_;
 	my $lei = $self->{lei};
+	my $err = _get_txt($self, qw(description description));
+	$lei->err($err) if $err; # non fatal
+	eval { set_description($self) };
+	warn $@ if $@;
 
 	# n.b. public-inbox-clone works w/o (SQLite || Xapian)
 	# lei is useless without Xapian + SQLite
diff --git a/t/lei-mirror.t b/t/lei-mirror.t
index 75e25b3f..35b77cf7 100644
--- a/t/lei-mirror.t
+++ b/t/lei-mirror.t
@@ -2,6 +2,7 @@
 # Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 use strict; use v5.10.1; use PublicInbox::TestCommon;
+use PublicInbox::Inbox;
 require_mods(qw(-httpd lei));
 my $sock = tcp_server();
 my ($tmpdir, $for_destroy) = tmpdir();
@@ -15,6 +16,8 @@ test_lei({ tmpdir => $tmpdir }, sub {
 	my $t1 = "$home/t1-mirror";
 	lei_ok('add-external', $t1, '--mirror', "$http/t1/", \'--mirror v1');
 	ok(-f "$t1/public-inbox/msgmap.sqlite3", 't1-mirror indexed');
+	is(PublicInbox::Inbox::try_cat("$t1/description"),
+		"mirror of $http/t1/\n", 'description set');
 
 	lei_ok('ls-external');
 	like($lei_out, qr!\Q$t1\E!, 't1 added to ls-externals');
@@ -22,6 +25,9 @@ test_lei({ tmpdir => $tmpdir }, sub {
 	my $t2 = "$home/t2-mirror";
 	lei_ok('add-external', $t2, '--mirror', "$http/t2/", \'--mirror v2');
 	ok(-f "$t2/msgmap.sqlite3", 't2-mirror indexed');
+	ok(-f "$t2/description", 't2 description');
+	is(PublicInbox::Inbox::try_cat("$t2/description"),
+		"mirror of $http/t2/\n", 'description set');
 
 	lei_ok('ls-external');
 	like($lei_out, qr!\Q$t2\E!, 't2 added to ls-externals');
@@ -109,4 +115,21 @@ SKIP: {
 ok($td->kill, 'killed -httpd');
 $td->join;
 
+{
+	require_ok 'PublicInbox::LeiMirror';
+	my $mrr = { src => 'https://example.com/src/', dst => $tmpdir };
+	my $exp = "mirror of https://example.com/src/\n";
+	my $f = "$tmpdir/description";
+	PublicInbox::LeiMirror::set_description($mrr);
+	is(PublicInbox::Inbox::try_cat($f), $exp, 'description set on ENOENT');
+
+	my $fh;
+	(open($fh, '>', $f) and close($fh)) or xbail $!;
+	PublicInbox::LeiMirror::set_description($mrr);
+	is(PublicInbox::Inbox::try_cat($f), $exp, 'description set on empty');
+	(open($fh, '>', $f) and print $fh "x\n" and close($fh)) or xbail $!;
+	is(PublicInbox::Inbox::try_cat($f), "x\n",
+		'description preserved if non-default');
+}
+
 done_testing;

^ permalink raw reply related	[relevance 4%]

Results 1-2 of 2 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2021-09-12  7:47  7% [PATCH 0/7] new public-inbox-{clone,fetch} commands Eric Wong
2021-09-12  7:47  4% ` [PATCH 4/7] clone|lei_mirror: write description in mirrors Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).