user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 5/5] lei prune-mail-sync: new command to prune invalid sync data
Date: Wed,  9 Jun 2021 07:47:51 +0000	[thread overview]
Message-ID: <20210609074751.29217-6-e@80x24.org> (raw)
In-Reply-To: <20210609074751.29217-1-e@80x24.org>

This will be invoked automatically by "lei import" eventually,
but it may make sense to expose as a separate command.
---
 MANIFEST                            |  1 +
 lib/PublicInbox/LEI.pm              |  2 +
 lib/PublicInbox/LeiPruneMailSync.pm | 97 +++++++++++++++++++++++++++++
 lib/PublicInbox/NetReader.pm        | 19 ++++++
 lib/PublicInbox/NetWriter.pm        | 21 +------
 5 files changed, 121 insertions(+), 19 deletions(-)
 create mode 100644 lib/PublicInbox/LeiPruneMailSync.pm

diff --git a/MANIFEST b/MANIFEST
index 7bdbf252..3d4c6cbd 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -222,6 +222,7 @@ lib/PublicInbox/LeiMirror.pm
 lib/PublicInbox/LeiOverview.pm
 lib/PublicInbox/LeiP2q.pm
 lib/PublicInbox/LeiPmdir.pm
+lib/PublicInbox/LeiPruneMailSync.pm
 lib/PublicInbox/LeiQuery.pm
 lib/PublicInbox/LeiRediff.pm
 lib/PublicInbox/LeiRemote.pm
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index 77fc5b8f..265b7047 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -245,6 +245,8 @@ our %CMD = ( # sorted in order of importance/use:
 	qw(no-torsocks torsocks=s), PublicInbox::LeiQuery::curl_opt(), @c_opt ],
 'forget-mail-sync' => [ 'LOCATION...',
 	'forget sync information for a mail folder', @c_opt ],
+'prune-mail-sync' => [ 'LOCATION...|--all',
+	'prune dangling sync data for a mail folder', 'all:s', @c_opt ],
 'export-kw' => [ 'LOCATION...|--all',
 	'one-time export of keywords of sync sources',
 	qw(all:s mode=s), @c_opt ],
diff --git a/lib/PublicInbox/LeiPruneMailSync.pm b/lib/PublicInbox/LeiPruneMailSync.pm
new file mode 100644
index 00000000..79f3325d
--- /dev/null
+++ b/lib/PublicInbox/LeiPruneMailSync.pm
@@ -0,0 +1,97 @@
+# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# "lei prune-mail-sync" drops dangling sync information
+package PublicInbox::LeiPruneMailSync;
+use strict;
+use v5.10.1;
+use parent qw(PublicInbox::IPC PublicInbox::LeiInput);
+use PublicInbox::LeiExportKw;
+use PublicInbox::InboxWritable qw(eml_from_path);
+
+sub eml_match ($$) {
+	my ($eml, $oidbin) = @_;
+	$oidbin eq git_sha(length($oidbin) == 20 ? 1 : 256, $eml)->digest;
+}
+
+sub prune_mdir { # lms->each_src callback
+	my ($oidbin, $id, $self, $mdir) = @_;
+	my @try = $$id =~ /:2,[a-zA-Z]*\z/ ? qw(cur new) : qw(new cur);
+	for my $d (@try) {
+		my $src = "$mdir/$d/$$id";
+		if ($self->{verify}) {
+			my $eml = eml_from_path($src) or next;
+			return if eml_match($eml, $oidbin);
+		} elsif (-f $src) {
+			return;
+		}
+	}
+	# both tries failed
+	$self->{lei}->qerr("# maildir:$mdir $$id gone");
+	$self->{lei}->{sto}->ipc_do('lms_clear_src', "maildir:$mdir", $id);
+}
+
+sub prune_imap { # lms->each_src callback
+	my ($oidbin, $uid, $self, $uids, $url) = @_;
+	return if exists $uids->{$uid};
+	$self->{lei}->qerr("# $url $uid gone");
+	$self->{lei}->{sto}->ipc_do('lms_clear_src', $url, $uid);
+}
+
+sub input_path_url { # overrides PublicInbox::LeiInput::input_path_url
+	my ($self, $input, @args) = @_;
+	my $lms = $self->{-lms_ro} //= $self->{lse}->lms;
+	if ($input =~ /\Amaildir:(.+)/i) {
+		my $mdir = $1;
+		$lms->each_src($input, \&prune_mdir, $self, $mdir);
+	} elsif ($input =~ m!\Aimaps?://!i) {
+		my $uri = PublicInbox::URIimap->new($input);
+		my $mic = $self->{lei}->{net}->mic_for_folder($uri);
+		my $uids = $mic->search('UID 1:*');
+		$uids = +{ map { $_ => undef } @$uids };
+		$lms->each_src($$uri, \&prune_imap, $self, $uids, $$uri);
+	} else { die "BUG: $input not supported" }
+	my $wait = $self->{lei}->{sto}->ipc_do('done');
+}
+
+sub lei_prune_mail_sync {
+	my ($lei, @folders) = @_;
+	my $sto = $lei->_lei_store or return $lei->fail(<<EOM);
+lei/store uninitialized, see lei-import(1)
+EOM
+	my $lse = $sto->search;
+	my $lms = $lse->lms or return $lei->fail(<<EOM);
+lei mail_sync uninitialized, see lei-import(1)
+EOM
+	if (defined(my $all = $lei->{opt}->{all})) {
+		$lms->group2folders($lei, $all, \@folders) or return;
+	} else {
+		my $err = $lms->arg2folder($lei, \@folders);
+		$lei->qerr(@{$err->{qerr}}) if $err->{qerr};
+		return $lei->fail($err->{fail}) if $err->{fail};
+	}
+	delete $lms->{dbh};
+	$sto->write_prepare($lei);
+	my $self = bless { lse => $lse }, __PACKAGE__;
+	$lei->{opt}->{'mail-sync'} = 1; # for prepare_inputs
+	$self->prepare_inputs($lei, \@folders) or return;
+	my $j = $lei->{opt}->{jobs} || scalar(@{$self->{inputs}}) || 1;
+	undef $lms; # for fork
+	my $ops = {};
+	$sto->write_prepare($lei);
+	$lei->{auth}->op_merge($ops, $self) if $lei->{auth};
+	$self->{-wq_nr_workers} = $j // 1; # locked
+	(my $op_c, $ops) = $lei->workers_start($self, $j, $ops);
+	$lei->{wq1} = $self;
+	$lei->{-err_type} = 'non-fatal';
+	net_merge_all_done($self) unless $lei->{auth};
+	$lei->wait_wq_events($op_c, $ops); # net_merge_all_done if !{auth}
+}
+
+no warnings 'once';
+*_complete_prune_mail_sync = \&PublicInbox::LeiExportKw::_complete_export_kw;
+*ipc_atfork_child = \&PublicInbox::LeiInput::input_only_atfork_child;
+*net_merge_all = \&PublicInbox::LeiAuth::net_merge_all;
+*net_merge_all_done = \&PublicInbox::LeiInput::input_only_net_merge_all_done;
+
+1;
diff --git a/lib/PublicInbox/NetReader.pm b/lib/PublicInbox/NetReader.pm
index 058f4313..2795a9d4 100644
--- a/lib/PublicInbox/NetReader.pm
+++ b/lib/PublicInbox/NetReader.pm
@@ -747,4 +747,23 @@ sub nntp_each {
 
 sub new { bless {}, shift };
 
+# updates $uri with UIDVALIDITY
+sub mic_for_folder {
+	my ($self, $uri) = @_;
+	my $mic = $self->mic_get($uri) or die "E: not connected: $@";
+	my $m = $self->isa('PublicInbox::NetWriter') ? 'select' : 'examine';
+	$mic->$m($uri->mailbox) or return;
+	my $uidval;
+	for ($mic->Results) {
+		/^\* OK \[UIDVALIDITY ([0-9]+)\].*/ or next;
+		$uidval = $1;
+		last;
+	}
+	$uidval //= $mic->uidvalidity($uri->mailbox) or
+		die "E: failed to get uidvalidity from <$uri>: $@";
+	$uri->uidvalidity($uidval);
+	$mic;
+}
+
+
 1;
diff --git a/lib/PublicInbox/NetWriter.pm b/lib/PublicInbox/NetWriter.pm
index 8ec7f85c..82288e6b 100644
--- a/lib/PublicInbox/NetWriter.pm
+++ b/lib/PublicInbox/NetWriter.pm
@@ -26,26 +26,9 @@ sub imap_append {
 		die "APPEND $folder: $@";
 }
 
-# updates $uri with UIDVALIDITY
-sub mic_for_folder {
-	my ($self, $uri) = @_;
-	my $mic = $self->mic_get($uri) or die "E: not connected: $@";
-	$mic->select($uri->mailbox) or return;
-	my $uidval;
-	for ($mic->Results) {
-		/^\* OK \[UIDVALIDITY ([0-9]+)\].*/ or next;
-		$uidval = $1;
-		last;
-	}
-	$uidval //= $mic->uidvalidity($uri->mailbox) or
-		die "E: failed to get uidvalidity from <$uri>: $@";
-	$uri->uidvalidity($uidval);
-	$mic;
-}
-
 sub imap_delete_all {
 	my ($self, $uri) = @_;
-	my $mic = mic_for_folder($self, $uri) or return;
+	my $mic = $self->mic_for_folder($uri) or return;
 	my $sec = $self->can('uri_section')->($uri);
 	local $0 = $uri->mailbox." $sec";
 	if ($mic->delete_message('1:*')) {
@@ -55,7 +38,7 @@ sub imap_delete_all {
 
 sub imap_delete_1 {
 	my ($self, $uri, $uid, $delete_mic) = @_;
-	$$delete_mic //= mic_for_folder($self, $uri) or return;
+	$$delete_mic //= $self->mic_for_folder($uri) or return;
 	$$delete_mic->delete_message($uid);
 }
 

      parent reply	other threads:[~2021-06-09  7:47 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-06-09  7:47 [PATCH 0/5] lei Maildir stuff Eric Wong
2021-06-09  7:47 ` [PATCH 1/5] inbox_writable: fix import_maildir Eric Wong
2021-06-09  7:47 ` [PATCH 2/5] mdir_reader: maildir_each_file: pass flags, skip Trash Eric Wong
2021-06-09  7:47 ` [PATCH 3/5] lei tag: parallelize Maildir access Eric Wong
2021-06-09  7:47 ` [PATCH 4/5] lei_mail_sync: hoist out --all handling from export-kw Eric Wong
2021-06-09  7:47 ` Eric Wong [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210609074751.29217-6-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).