user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download mbox.gz: |
* [PATCH 2/4] lei convert: fix repeat and idempotent v2 output
  2023-11-15  9:21  7% [PATCH 0/4] lei convert: support idempotent v2 outputs Eric Wong
@ 2023-11-15  9:21  4% ` Eric Wong
  0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2023-11-15  9:21 UTC (permalink / raw)
  To: meta

We should be able to treat v2 outputs just like any other mail
format, with the exception that content dedupe is always
enforced by the v2 format.

This allows users hosting v2 public-inboxes to catch up broken
synchronization from alternate archives such as the mbox
archives hosted by https://lists.gnu.org/

Link: https://public-inbox.org/meta/20231114-hypersonic-papaya-starling-e1cfc8@nitro/
---
 lib/PublicInbox/LeiConvert.pm  |  8 ++++++--
 lib/PublicInbox/LeiOverview.pm |  4 ++--
 lib/PublicInbox/LeiToMail.pm   |  3 +--
 lib/PublicInbox/LeiXSearch.pm  |  4 ++--
 lib/PublicInbox/V2Writable.pm  |  3 ++-
 t/lei-convert.t                | 31 ++++++++++++++++++++++++++++++-
 6 files changed, 43 insertions(+), 10 deletions(-)

diff --git a/lib/PublicInbox/LeiConvert.pm b/lib/PublicInbox/LeiConvert.pm
index 22aba81a..4a1f8323 100644
--- a/lib/PublicInbox/LeiConvert.pm
+++ b/lib/PublicInbox/LeiConvert.pm
@@ -34,9 +34,13 @@ sub process_inputs { # via wq_do
 	$self->SUPER::process_inputs;
 	my $lei = $self->{lei};
 	delete $lei->{1};
-	my $l2m = delete $self->{l2m};
-	delete $self->{wcb}; # commit
+	my $l2m = delete $lei->{l2m};
 	my $nr_w = delete($l2m->{-nr_write}) // 0;
+	delete $self->{wcb}; # commit
+	if (my $v2w = delete $lei->{v2w}) {
+		$nr_w = $v2w->wq_do('done'); # may die
+		$v2w->wq_close;
+	}
 	my $d = (delete($l2m->{-nr_seen}) // 0) - $nr_w;
 	$d = $d ? " ($d duplicates)" : '';
 	$lei->qerr("# converted $nr_w messages$d");
diff --git a/lib/PublicInbox/LeiOverview.pm b/lib/PublicInbox/LeiOverview.pm
index 129dabf8..0529bbe4 100644
--- a/lib/PublicInbox/LeiOverview.pm
+++ b/lib/PublicInbox/LeiOverview.pm
@@ -41,8 +41,8 @@ sub detect_fmt ($) {
 	my ($dst) = @_;
 	if ($dst =~ m!\A([:/]+://)!) {
 		die "$1 support not implemented, yet\n";
-	} elsif (!-e $dst || -d _) {
-		'maildir'; # the default TODO: MH?
+	} elsif (!-e $dst || -d _) { # maildir is the default TODO: MH
+		-e "$dst/inbox.lock" ? 'v2' : 'maildir';
 	} elsif (-f _ || -p _) {
 		die "unable to determine mbox family of $dst\n";
 	} else {
diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm
index 2928be45..2d9b7061 100644
--- a/lib/PublicInbox/LeiToMail.pm
+++ b/lib/PublicInbox/LeiToMail.pm
@@ -375,7 +375,6 @@ sub _v2_write_cb ($$) {
 		++$self->{-nr_seen};
 		return if $dedupe && $dedupe->is_dup($eml, $smsg);
 		$lei->{v2w}->wq_do('add', $eml); # V2Writable->add
-		++$self->{-nr_write};
 	}
 }
 
@@ -435,7 +434,7 @@ sub new {
 			($lei->{opt}->{dedupe}//'') eq 'oid';
 		$self->{base_type} = 'v2';
 		$self->{-wq_nr_workers} = 1; # v2 has shards
-		$lei->{opt}->{save} = \1;
+		$lei->{opt}->{save} //= \1 if $lei->{cmd} eq 'q';
 		$dst = $lei->{ovv}->{dst} = $lei->abs_path($dst);
 		@conflict = qw(mua sort);
 	} else {
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index e85fd3c4..7eda6f9e 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -391,8 +391,9 @@ sub query_done { # EOF callback for main daemon
 	($lei->{opt}->{'mail-sync'} && !$lei->{sto}) and
 		warn "BUG: {sto} missing with --mail-sync";
 	$lei->sto_done_request;
+	my $nr_w = delete($lei->{-nr_write}) // 0;
 	if (my $v2w = delete $lei->{v2w}) {
-		my $wait = $v2w->wq_do('done'); # may die
+		$nr_w = $v2w->wq_do('done'); # may die
 		$v2w->wq_close;
 	}
 	$lei->{ovv}->ovv_end($lei);
@@ -412,7 +413,6 @@ Error closing $lei->{ovv}->{dst}: \$!=$! \$?=$?
 			delete $l2m->{mbl}; # drop dotlock
 		}
 	}
-	my $nr_w = delete($lei->{-nr_write}) // 0;
 	my $nr_dup = (delete($lei->{-nr_seen}) // 0) - $nr_w;
 	if ($lei->{-progress}) {
 		my $tot = $lei->{-mset_total} // 0;
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index 4d606dfe..231ed516 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -135,7 +135,7 @@ sub add {
 	if (do_idx($self, $mime, $smsg)) {
 		$self->checkpoint;
 	}
-
+	++$self->{-nr_add}; # for lei convert
 	$cmt;
 }
 
@@ -611,6 +611,7 @@ sub done {
 	$self->lock_release(!!$nbytes) if $shards;
 	$self->git->cleanup;
 	die $err if $err;
+	delete $self->{-nr_add}; # for lei-convert
 }
 
 sub importer {
diff --git a/t/lei-convert.t b/t/lei-convert.t
index 84b57f81..6aff80bb 100644
--- a/t/lei-convert.t
+++ b/t/lei-convert.t
@@ -8,7 +8,8 @@ use PublicInbox::NetReader;
 use PublicInbox::Eml;
 use IO::Uncompress::Gunzip;
 use File::Path qw(remove_tree);
-use PublicInbox::Spawn qw(which);
+use PublicInbox::Spawn qw(which run_qx);
+use File::Compare;
 use autodie qw(open);
 require_mods(qw(lei -imapd -nntpd Mail::IMAPClient Net::NNTP));
 my ($tmpdir, $for_destroy) = tmpdir;
@@ -28,8 +29,36 @@ test_lei({ tmpdir => $tmpdir }, sub {
 	my $d = $ENV{HOME};
 	lei_ok('convert', '-o', "mboxrd:$d/foo.mboxrd",
 		"imap://$imap_host_port/t.v2.0");
+	my ($nc0) = ($lei_err =~ /converted (\d+) messages/);
 	ok(-f "$d/foo.mboxrd", 'mboxrd created from imap://');
 
+	lei_ok qw(convert -o), "v2:$d/v2-test", "mboxrd:$d/foo.mboxrd";
+	my ($nc) = ($lei_err =~ /converted (\d+) messages/);
+	is $nc, $nc0, 'converted all messages messages';
+	lei_ok qw(q z:0.. -f jsonl --only), "$d/v2-test";
+	is(scalar(split(/^/sm, $lei_out)), $nc, 'got all messages in v2-test');
+
+	lei_ok qw(convert -o), "mboxrd:$d/from-v2.mboxrd", "$d/v2-test";
+	like $lei_err, qr/converted $nc messages/;
+	is(compare("$d/foo.mboxrd", "$d/from-v2.mboxrd"), 0,
+		'convert mboxrd -> v2 ->mboxrd roundtrip') or
+			diag run_qx([qw(git diff --no-index),
+					"$d/foo.mboxrd", "$d/from-v2.mboxrd"]);
+
+	lei_ok [qw(convert -F eml -o), "$d/v2-test"], undef,
+		{ 0 => \<<'EOM', %$lei_opt };
+From: f@example.com
+To: t@example.com
+Subject: append-to-v2-on-convert
+Message-ID: <append-to-v2-on-convert@example>
+Date: Fri, 02 Oct 1993 00:00:00 +0000
+EOM
+	like $lei_err, qr/converted 1 messages/, 'only one message added';
+	lei_ok qw(q z:0.. -f jsonl --only), "$d/v2-test";
+	is(scalar(split(/^/sm, $lei_out)), $nc + 1,
+		'got expected number of messages after append convert');
+	like $lei_out, qr/append-to-v2-on-convert/;
+
 	lei_ok('convert', '-o', "mboxrd:$d/nntp.mboxrd",
 		"nntp://$nntp_host_port/t.v2");
 	ok(-f "$d/nntp.mboxrd", 'mboxrd created from nntp://');

^ permalink raw reply related	[relevance 4%]

* [PATCH 0/4] lei convert: support idempotent v2 outputs
@ 2023-11-15  9:21  7% Eric Wong
  2023-11-15  9:21  4% ` [PATCH 2/4] lei convert: fix repeat and idempotent v2 output Eric Wong
  0 siblings, 1 reply; 2+ results
From: Eric Wong @ 2023-11-15  9:21 UTC (permalink / raw)
  To: meta

This may make it easier for public-inbox admins to forcibly
inject missing messages from existing mbox*/maildir/IMAP/NNTP
archives.

1/4 was only needed to get 2/4 working, but 3/4 makes
it unnecessary with our current codebase (though we may
still need 1/4 in the future).

4/4 was noticed while working on 3/4.

Eric Wong (4):
  lei: fix idempotent STDERR redirect in workers
  lei convert: fix repeat and idempotent v2 output
  lei: avoid extra fork for v2 outputs
  lei q|up|convert: common finish_output to detect errors

 lib/PublicInbox/LEI.pm         |  2 +-
 lib/PublicInbox/LeiConvert.pm  |  9 ++++++---
 lib/PublicInbox/LeiOverview.pm |  4 ++--
 lib/PublicInbox/LeiToMail.pm   | 33 +++++++++++++++++++++------------
 lib/PublicInbox/LeiXSearch.pm  | 13 +------------
 lib/PublicInbox/V2Writable.pm  |  1 -
 t/lei-convert.t                | 31 ++++++++++++++++++++++++++++++-
 7 files changed, 61 insertions(+), 32 deletions(-)

^ permalink raw reply	[relevance 7%]

Results 1-2 of 2 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2023-11-15  9:21  7% [PATCH 0/4] lei convert: support idempotent v2 outputs Eric Wong
2023-11-15  9:21  4% ` [PATCH 2/4] lei convert: fix repeat and idempotent v2 output Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).