From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 611441F9F3 for ; Thu, 16 Sep 2021 09:41:17 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 2/3] lei refresh-mail-sync: replace prune-mail-sync Date: Thu, 16 Sep 2021 09:41:15 +0000 Message-Id: <20210916094116.11457-3-e@80x24.org> In-Reply-To: <20210916094116.11457-1-e@80x24.org> References: <20210916094116.11457-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: Merely pruning mail synchronization information was insufficient for Maildir: renames are common in Maildir and we need to detect them after-the-fact when lei-daemon isn't running. Running this command could make "lei index" far more useful... --- MANIFEST | 2 + lib/PublicInbox/LEI.pm | 3 +- ...PruneMailSync.pm => LeiRefreshMailSync.pm} | 36 +++++++--- lib/PublicInbox/LeiStore.pm | 5 ++ t/lei-export-kw.t | 1 - t/lei-refresh-mail-sync.t | 67 +++++++++++++++++++ 6 files changed, 103 insertions(+), 11 deletions(-) rename lib/PublicInbox/{LeiPruneMailSync.pm => LeiRefreshMailSync.pm} (70%) create mode 100644 t/lei-refresh-mail-sync.t diff --git a/MANIFEST b/MANIFEST index 640eabd1..221cb992 100644 --- a/MANIFEST +++ b/MANIFEST @@ -238,6 +238,7 @@ lib/PublicInbox/LeiPmdir.pm lib/PublicInbox/LeiPruneMailSync.pm lib/PublicInbox/LeiQuery.pm lib/PublicInbox/LeiRediff.pm +lib/PublicInbox/LeiRefreshMailSync.pm lib/PublicInbox/LeiRemote.pm lib/PublicInbox/LeiRm.pm lib/PublicInbox/LeiRmWatch.pm @@ -450,6 +451,7 @@ t/lei-q-kw.t t/lei-q-remote-import.t t/lei-q-save.t t/lei-q-thread.t +t/lei-refresh-mail-sync.t t/lei-sigpipe.t t/lei-tag.t t/lei-up.t diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm index ec103231..9794497b 100644 --- a/lib/PublicInbox/LEI.pm +++ b/lib/PublicInbox/LEI.pm @@ -263,7 +263,7 @@ our %CMD = ( # sorted in order of importance/use: @net_opt, @c_opt ], 'forget-mail-sync' => [ 'LOCATION...', 'forget sync information for a mail folder', @c_opt ], -'prune-mail-sync' => [ 'LOCATION...|--all', +'refresh-mail-sync' => [ 'LOCATION...|--all', 'prune dangling sync data for a mail folder', 'all:s', @c_opt ], 'export-kw' => [ 'LOCATION...|--all', 'one-time export of keywords of sync sources', @@ -616,6 +616,7 @@ sub pkt_ops { $ops->{x_it} = [ \&x_it, $lei ]; $ops->{child_error} = [ \&child_error, $lei ]; $ops->{incr} = [ \&incr, $lei ]; + $ops->{sto_done_request} = [ \&sto_done_request, $lei, $lei->{sock} ]; $ops; } diff --git a/lib/PublicInbox/LeiPruneMailSync.pm b/lib/PublicInbox/LeiRefreshMailSync.pm similarity index 70% rename from lib/PublicInbox/LeiPruneMailSync.pm rename to lib/PublicInbox/LeiRefreshMailSync.pm index 3678bd04..07b0aa52 100644 --- a/lib/PublicInbox/LeiPruneMailSync.pm +++ b/lib/PublicInbox/LeiRefreshMailSync.pm @@ -1,16 +1,20 @@ # Copyright (C) 2021 all contributors # License: AGPL-3.0+ -# "lei prune-mail-sync" drops dangling sync information -package PublicInbox::LeiPruneMailSync; +# "lei refresh-mail-sync" drops dangling sync information +# and attempts to detect moved files +package PublicInbox::LeiRefreshMailSync; use strict; use v5.10.1; use parent qw(PublicInbox::IPC PublicInbox::LeiInput); use PublicInbox::LeiExportKw; use PublicInbox::InboxWritable qw(eml_from_path); +use PublicInbox::ContentHash qw(git_sha); +use PublicInbox::Import; sub eml_match ($$) { my ($eml, $oidbin) = @_; + $eml->header_set($_) for @PublicInbox::Import::UNWANTED_HEADERS; $oidbin eq git_sha(length($oidbin) == 20 ? 1 : 256, $eml)->digest; } @@ -20,7 +24,7 @@ sub prune_mdir { # lms->each_src callback for my $d (@try) { my $src = "$mdir/$d/$$id"; if ($self->{verify}) { - my $eml = eml_from_path($src) or next; + my $eml = eml_from_path($src) // next; return if eml_match($eml, $oidbin); } elsif (-f $src) { return; @@ -38,12 +42,27 @@ sub prune_imap { # lms->each_src callback $self->{lei}->{sto}->ipc_do('lms_clear_src', $url, $uid); } +# detects missed file moves +sub pmdir_cb { # called via LeiPmdir->each_mdir_fn + my ($self, $f, $fl) = @_; + my ($folder, $bn) = ($f =~ m!\A(.+?)/(?:new|cur)/([^/]+)\z!) or + die "BUG: $f was not from a Maildir?"; + substr($folder, 0, 0) = 'maildir:'; # add prefix + my $lms = $self->{-lms_ro} //= $self->{lei}->lms; + return if defined($lms->name_oidbin($folder, $bn)); + my $eml = eml_from_path($f) // return; + my $oidbin = $self->{lei}->git_oid($eml)->digest; + $self->{lei}->{sto}->ipc_do('lms_set_src', $oidbin, $folder, \$bn); +} + sub input_path_url { # overrides PublicInbox::LeiInput::input_path_url my ($self, $input, @args) = @_; my $lms = $self->{-lms_ro} //= $self->{lei}->lms; if ($input =~ /\Amaildir:(.+)/i) { - my $mdir = $1; - $lms->each_src($input, \&prune_mdir, $self, $mdir); + $lms->each_src($input, \&prune_mdir, $self, my $mdir = $1); + $self->{lse} //= $self->{lei}->{sto}->search; + # call pmdir_cb (via maildir_each_file -> each_mdir_fn) + PublicInbox::LeiInput::input_path_url($self, $input); } elsif ($input =~ m!\Aimaps?://!i) { my $uri = PublicInbox::URIimap->new($input); my $mic = $self->{lei}->{net}->mic_for_folder($uri); @@ -51,10 +70,10 @@ sub input_path_url { # overrides PublicInbox::LeiInput::input_path_url $uids = +{ map { $_ => undef } @$uids }; $lms->each_src($$uri, \&prune_imap, $self, $uids, $$uri); } else { die "BUG: $input not supported" } - my $wait = $self->{lei}->{sto}->ipc_do('done'); + $self->{lei}->{pkt_op_p}->pkt_do('sto_done_request'); } -sub lei_prune_mail_sync { +sub lei_refresh_mail_sync { my ($lei, @folders) = @_; my $sto = $lei->_lei_store or return $lei->fail(<prepare_inputs($lei, \@folders) or return; my $j = $lei->{opt}->{jobs} || scalar(@{$self->{inputs}}) || 1; my $ops = {}; - $sto->write_prepare($lei); $lei->{auth}->op_merge($ops, $self) if $lei->{auth}; $self->{-wq_nr_workers} = $j // 1; # locked (my $op_c, $ops) = $lei->workers_start($self, $j, $ops); @@ -89,7 +107,7 @@ EOM } no warnings 'once'; -*_complete_prune_mail_sync = \&PublicInbox::LeiExportKw::_complete_export_kw; +*_complete_refresh_mail_sync = \&PublicInbox::LeiExportKw::_complete_export_kw; *ipc_atfork_child = \&PublicInbox::LeiInput::input_only_atfork_child; *net_merge_all_done = \&PublicInbox::LeiInput::input_only_net_merge_all_done; diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm index e8bcb04e..32f55abd 100644 --- a/lib/PublicInbox/LeiStore.pm +++ b/lib/PublicInbox/LeiStore.pm @@ -293,6 +293,11 @@ sub set_sync_info { _lms_rw($self)->set_src(pack('H*', $oidhex), $folder, $id); } +sub lms_set_src { + my ($self, $oidbin, $folder, $id) = @_; + _lms_rw($self)->set_src($oidbin, $folder, $id); +} + sub _remove_if_local { # git->cat_async arg my ($bref, $oidhex, $type, $size, $self) = @_; $self->{im}->remove($bref) if $bref; diff --git a/t/lei-export-kw.t b/t/lei-export-kw.t index 9531949a..1fe940bb 100644 --- a/t/lei-export-kw.t +++ b/t/lei-export-kw.t @@ -6,7 +6,6 @@ use File::Copy qw(cp); use File::Path qw(make_path); require_mods(qw(lei -imapd Mail::IMAPClient)); my ($tmpdir, $for_destroy) = tmpdir; -my ($ro_home, $cfg_path) = setup_public_inboxes; my $expect = eml_load('t/data/0001.patch'); test_lei({ tmpdir => $tmpdir }, sub { my $home = $ENV{HOME}; diff --git a/t/lei-refresh-mail-sync.t b/t/lei-refresh-mail-sync.t new file mode 100644 index 00000000..ff558277 --- /dev/null +++ b/t/lei-refresh-mail-sync.t @@ -0,0 +1,67 @@ +#!perl -w +# Copyright (C) all contributors +# License: AGPL-3.0+ +use strict; use v5.10.1; use PublicInbox::TestCommon; +require_mods(qw(lei)); + +my $stop_daemon = sub { # needed since we don't have inotify + lei_ok qw(daemon-pid); + chomp(my $pid = $lei_out); + $pid > 0 or xbail "bad pid: $pid"; + kill('TERM', $pid) or xbail "kill: $!"; + for (0..10) { + tick; + kill(0, $pid) or last; + } + kill(0, $pid) and xbail "daemon still running (PID:$pid)"; +}; + +test_lei({ daemon_only => 1 }, sub { + my $d = "$ENV{HOME}/d"; + my ($ro_home, $cfg_path) = setup_public_inboxes; + lei_ok qw(daemon-pid); + lei_ok qw(add-external), "$ro_home/t2"; + lei_ok qw(q mid:testmessage@example.com -o), "Maildir:$d"; + my (@o) = glob("$d/*/*"); + scalar(@o) == 1 or xbail('multiple results', \@o); + my ($bn0) = ($o[0] =~ m!/([^/]+)\z!); + + my $oid = '9bf1002c49eb075df47247b74d69bcd555e23422'; + lei_ok 'inspect', "blob:$oid"; + my $before = json_utf8->decode($lei_out); + my $exp0 = { 'mail-sync' => { "maildir:$d" => [ $bn0 ] } }; + is_deeply($before, $exp0, 'inspect shows expected'); + + $stop_daemon->(); + my $dst = $o[0]; + $dst =~ s/:2,.*\z// and $dst =~ s!/cur/!/new/! and + rename($o[0], $dst) or xbail "rename($o[0] => $dst): $!"; + + lei_ok 'inspect', "blob:$oid"; + is_deeply(json_utf8->decode($lei_out), + $before, 'inspect unchanged immediately after restart'); + lei_ok 'refresh-mail-sync', '--all'; + lei_ok 'inspect', "blob:$oid"; + my ($bn1) = ($dst =~ m!/([^/]+)\z!); + my $exp1 = { 'mail-sync' => { "maildir:$d" => [ $bn1 ] } }; + is_deeply(json_utf8->decode($lei_out), $exp1, + 'refresh-mail-sync updated location'); + + $stop_daemon->(); + rename($dst, "$d/unwatched") or xbail "rename $dst out-of-the-way $!"; + + lei_ok 'refresh-mail-sync', $d; + lei_ok 'inspect', "blob:$oid"; + is($lei_out, '{}', 'no known locations after "removal"'); + lei_ok 'refresh-mail-sync', "Maildir:$d"; + + $stop_daemon->(); + rename("$d/unwatched", $dst) or xbail "rename $dst back"; + + lei_ok 'refresh-mail-sync', "Maildir:$d"; + lei_ok 'inspect', "blob:$oid"; + is_deeply(json_utf8->decode($lei_out), $exp1, + 'replaced file noted again'); +}); + +done_testing;