diff options
-rw-r--r-- | Documentation/lei-store-format.pod | 2 | ||||
-rw-r--r-- | lib/PublicInbox/IMAPTracker.pm | 35 | ||||
-rw-r--r-- | lib/PublicInbox/LEI.pm | 2 | ||||
-rw-r--r-- | lib/PublicInbox/LeiImport.pm | 12 | ||||
-rw-r--r-- | lib/PublicInbox/LeiInput.pm | 10 | ||||
-rw-r--r-- | lib/PublicInbox/LeiMailSync.pm | 2 | ||||
-rw-r--r-- | lib/PublicInbox/NetReader.pm | 60 | ||||
-rw-r--r-- | lib/PublicInbox/URIimap.pm | 2 | ||||
-rw-r--r-- | t/lei-import-imap.t | 4 | ||||
-rw-r--r-- | t/lei-import-nntp.t | 4 |
10 files changed, 64 insertions, 69 deletions
diff --git a/Documentation/lei-store-format.pod b/Documentation/lei-store-format.pod index 3e1ddc65..71aa72cb 100644 --- a/Documentation/lei-store-format.pod +++ b/Documentation/lei-store-format.pod @@ -32,7 +32,7 @@ prevent them from being accidentally treated as a v2 inbox. ~/.local/share/lei/store - ipc.lock # lock file for internal lei IPC - local/$EPOCH.git # normal bare git repositories - - net_last.sqlite3 # import state for IMAP & NNTP + - mail_sync.sqlite3 # sync state IMAP, Maildir, NNTP Additionally, the following share the same roles they do in extindex: diff --git a/lib/PublicInbox/IMAPTracker.pm b/lib/PublicInbox/IMAPTracker.pm index fe813582..5eb33cf7 100644 --- a/lib/PublicInbox/IMAPTracker.pm +++ b/lib/PublicInbox/IMAPTracker.pm @@ -39,20 +39,12 @@ sub dbh_new ($) { $dbh; } -sub get_last ($;$) { - my ($self, $validity) = @_; - my $sth; - if (defined $validity) { - $sth = $self->{dbh}->prepare_cached(<<'', undef, 1); -SELECT uid_validity, uid FROM imap_last WHERE url = ? AND uid_validity = ? - - $sth->execute($self->{url}, $validity); - } else { - $sth = $self->{dbh}->prepare_cached(<<'', undef, 1); +sub get_last ($) { + my ($self) = @_; + my $sth = $self->{dbh}->prepare_cached(<<'', undef, 1); SELECT uid_validity, uid FROM imap_last WHERE url = ? - $sth->execute($self->{url}); - } + $sth->execute($self->{url}); $sth->fetchrow_array; } @@ -70,19 +62,16 @@ VALUES (?, ?, ?) } sub new { - my ($class, $url, $dbname) = @_; + my ($class, $url) = @_; - unless (defined($dbname)) { - # original name for compatibility with old setups: - $dbname = PublicInbox::Config->config_dir() . '/imap.sqlite3'; + # original name for compatibility with old setups: + my $dbname = PublicInbox::Config->config_dir() . '/imap.sqlite3'; - # use the new XDG-compliant name for new setups: - if (!-f $dbname) { - $dbname = ($ENV{XDG_DATA_HOME} // - (($ENV{HOME} // '/nonexistent'). - '/.local/share')) . - '/public-inbox/imap.sqlite3'; - } + # use the new XDG-compliant name for new setups: + if (!-f $dbname) { + $dbname = ($ENV{XDG_DATA_HOME} // + (($ENV{HOME} // '/nonexistent').'/.local/share')) . + '/public-inbox/imap.sqlite3'; } if (!-f $dbname) { require File::Path; diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm index 1ea7c9ca..52ce8ec2 100644 --- a/lib/PublicInbox/LEI.pm +++ b/lib/PublicInbox/LEI.pm @@ -211,7 +211,7 @@ our %CMD = ( # sorted in order of importance/use: 'import' => [ 'LOCATION...|--stdin', 'one-time import/update from URL or filesystem', qw(stdin| offset=i recursive|r exclude=s include|I=s - lock=s@ in-format|F=s kw! verbose|v+ incremental! sync!), @c_opt ], + lock=s@ in-format|F=s kw! verbose|v+ incremental! mail-sync!), @c_opt ], 'convert' => [ 'LOCATION...|--stdin', 'one-time conversion from URL or filesystem to another format', qw(stdin| in-format|F=s out-format|f=s output|mfolder|o=s diff --git a/lib/PublicInbox/LeiImport.pm b/lib/PublicInbox/LeiImport.pm index 26127ece..277f4f95 100644 --- a/lib/PublicInbox/LeiImport.pm +++ b/lib/PublicInbox/LeiImport.pm @@ -41,18 +41,13 @@ sub input_maildir_cb { # maildir_each_eml cb input_eml_cb($self, $eml, $vmd); } -sub input_imap_cb { # imap_each +sub input_net_cb { # imap_each / nntp_each my ($url, $uid, $kw, $eml, $self) = @_; my $vmd = $self->{-import_kw} ? { kw => $kw } : undef; $vmd->{sync_info} = [ $url, $uid ] if $self->{-mail_sync}; input_eml_cb($self, $eml, $vmd); } -sub input_nntp_cb { # nntp_each - my ($url, $num, $kw, $eml, $self) = @_; - input_eml_cb($self, $eml, $self->{-import_kw} ? { kw => $kw } : undef); -} - sub net_merge_complete { # callback used by LeiAuth my ($self) = @_; $self->wq_io_do('process_inputs'); @@ -69,7 +64,7 @@ sub lei_import { # the main "lei import" method return $lei->fail(join("\n", @{$vmd_mod->{err}})) if $vmd_mod->{err}; $self->{all_vmd} = $vmd_mod if scalar keys %$vmd_mod; $self->prepare_inputs($lei, \@inputs) or return; - $self->{-mail_sync} = $lei->{opt}->{sync} // 1; + $self->{-mail_sync} = $lei->{opt}->{'mail-sync'} // 1; $lei->ale; # initialize for workers to read my $j = $lei->{opt}->{jobs} // scalar(@{$self->{inputs}}) || 1; @@ -77,8 +72,7 @@ sub lei_import { # the main "lei import" method # $j = $net->net_concurrency($j); TODO if ($lei->{opt}->{incremental} // 1) { $net->{incremental} = 1; - $net->{itrk_fn} = $lei->store_path . - '/net_last.sqlite3'; + $net->{-lms_ro} = $lei->_lei_store->search->lms // 0; } } else { my $nproc = $self->detect_nproc; diff --git a/lib/PublicInbox/LeiInput.pm b/lib/PublicInbox/LeiInput.pm index 785e607d..ce675f40 100644 --- a/lib/PublicInbox/LeiInput.pm +++ b/lib/PublicInbox/LeiInput.pm @@ -110,14 +110,12 @@ sub input_path_url { my $ifmt = lc($lei->{opt}->{'in-format'} // ''); # TODO auto-detect? if ($input =~ m!\Aimaps?://!i) { - $lei->{net}->imap_each($input, $self->can('input_imap_cb') // - $self->can('input_net_cb'), - $self, @args); + $lei->{net}->imap_each($input, $self->can('input_net_cb'), + $self, @args); return; } elsif ($input =~ m!\A(?:nntps?|s?news)://!i) { - $lei->{net}->nntp_each($input, $self->can('input_nntp_cb') // - $self->can('input_net_cb'), - $self, @args); + $lei->{net}->nntp_each($input, $self->can('input_net_cb'), + $self, @args); return; } elsif ($input =~ m!\Ahttps?://!i) { handle_http_input($self, $input, @args); diff --git a/lib/PublicInbox/LeiMailSync.pm b/lib/PublicInbox/LeiMailSync.pm index 52f26d69..2ce189fa 100644 --- a/lib/PublicInbox/LeiMailSync.pm +++ b/lib/PublicInbox/LeiMailSync.pm @@ -143,7 +143,7 @@ sub each_src { } sub location_stats { - my ($self, $folder, $cb, @args) = @_; + my ($self, $folder) = @_; my $dbh = $self->{dbh} //= dbh_new($self); my $fid; my $ret = {}; diff --git a/lib/PublicInbox/NetReader.pm b/lib/PublicInbox/NetReader.pm index 5978752f..81d25ead 100644 --- a/lib/PublicInbox/NetReader.pm +++ b/lib/PublicInbox/NetReader.pm @@ -235,7 +235,7 @@ sub imap_common_init ($;$) { $self->{quiet} = 1 if $lei && $lei->{opt}->{quiet}; eval { require PublicInbox::IMAPClient } or die "Mail::IMAPClient is required for IMAP:\n$@\n"; - eval { require PublicInbox::IMAPTracker } or + ($lei || eval { require PublicInbox::IMAPTracker }) or die "DBD::SQLite is required for IMAP\n:$@\n"; require PublicInbox::URIimap; my $cfg = $self->{pi_cfg} // $lei->_lei_cfg; @@ -283,7 +283,7 @@ sub nntp_common_init ($;$) { $self->{quiet} = 1 if $lei && $lei->{opt}->{quiet}; eval { require Net::NNTP } or die "Net::NNTP is required for NNTP:\n$@\n"; - eval { require PublicInbox::IMAPTracker } or + ($lei || eval { require PublicInbox::IMAPTracker }) or die "DBD::SQLite is required for NNTP\n:$@\n"; my $cfg = $self->{pi_cfg} // $lei->_lei_cfg; my $nn_args = {}; # scheme://authority => Net::NNTP->new arg @@ -373,17 +373,28 @@ sub run_commit_cb ($) { $cb->(@args); } -sub _itrk ($$) { - my ($self, $uri) = @_; - return unless $self->{incremental}; - # itrk_fn is set by lei - PublicInbox::IMAPTracker->new($$uri, $self->{itrk_fn}); +sub _itrk_last ($$;$) { + my ($self, $uri, $r_uidval) = @_; + return (undef, undef, $r_uidval) unless $self->{incremental}; + my ($itrk, $l_uid, $l_uidval); + if (defined(my $lms = $self->{-lms_ro})) { # LeiMailSync or 0 + $uri->uidvalidity($r_uidval) if defined $r_uidval; + my $x; + $l_uid = ($lms && ($x = $lms->location_stats($$uri))) ? + $x->{'uid.max'} : undef; + # itrk remains undef, lei/store worker writes to + # mail_sync.sqlite3 + } else { + $itrk = PublicInbox::IMAPTracker->new($$uri); + ($l_uidval, $l_uid) = $itrk->get_last($$uri); + } + ($itrk, $l_uid, $l_uidval //= $r_uidval); } sub _imap_fetch_all ($$$) { - my ($self, $mic, $uri) = @_; - my $sec = uri_section($uri); - my $mbx = $uri->mailbox; + my ($self, $mic, $orig_uri) = @_; + my $sec = uri_section($orig_uri); + my $mbx = $orig_uri->mailbox; $mic->Clear(1); # trim results history $mic->examine($mbx) or return "E: EXAMINE $mbx ($sec) failed: $!"; my ($r_uidval, $r_uidnext); @@ -393,20 +404,22 @@ sub _imap_fetch_all ($$$) { last if $r_uidval && $r_uidnext; } $r_uidval //= $mic->uidvalidity($mbx) // - return "E: $uri cannot get UIDVALIDITY"; + return "E: $orig_uri cannot get UIDVALIDITY"; $r_uidnext //= $mic->uidnext($mbx) // - return "E: $uri cannot get UIDNEXT"; - my $url = ref($uri)->new($$uri); - $url->uidvalidity($r_uidval); - $url = $$url; - my $itrk = _itrk($self, $uri); - my $l_uid; - $l_uid = $itrk->get_last($r_uidval) if $itrk; + return "E: $orig_uri cannot get UIDNEXT"; + my $uri = $orig_uri->clone; + my ($itrk, $l_uid, $l_uidval) = _itrk_last($self, $uri, $r_uidval); + return <<EOF if $l_uidval != $r_uidval; +E: $uri UIDVALIDITY mismatch +E: local=$l_uidval != remote=$r_uidval +EOF + $uri->uidvalidity($r_uidval); $l_uid //= 0; my $r_uid = $r_uidnext - 1; - if ($l_uid > $r_uid) { - return "E: $uri local UID exceeds remote ($l_uid > $r_uid)\n"; - } + return <<EOF if $l_uid > $r_uid; +E: $uri local UID exceeds remote ($l_uid > $r_uid) +E: $uri strangely, UIDVALIDLITY matches ($l_uidval) +EOF return if $l_uid >= $r_uid; # nothing to do $l_uid ||= 1; my ($mod, $shard) = @{$self->{shard_info} // []}; @@ -458,7 +471,7 @@ sub _imap_fetch_all ($$$) { # messages get deleted, so holes appear my $per_uid = delete $r->{$uid} // next; my $raw = delete($per_uid->{$key}) // next; - _imap_do_msg($self, $url, $uid, \$raw, + _imap_do_msg($self, $$uri, $uid, \$raw, $per_uid->{FLAGS}); $last_uid = $uid; last if $self->{quit}; @@ -547,8 +560,7 @@ sub _nntp_fetch_all ($$$) { # IMAPTracker is also used for tracking NNTP, UID == article number # LIST.ACTIVE can get the equivalent of UIDVALIDITY, but that's # expensive. So we assume newsgroups don't change: - my $itrk = _itrk($self, $uri); - my (undef, $l_art) = $itrk ? $itrk->get_last : (); + my ($itrk, $l_art) = _itrk_last($self, $uri); # allow users to specify articles to refetch # cf. https://tools.ietf.org/id/draft-gilman-news-url-01.txt diff --git a/lib/PublicInbox/URIimap.pm b/lib/PublicInbox/URIimap.pm index df9f5fd9..f6244137 100644 --- a/lib/PublicInbox/URIimap.pm +++ b/lib/PublicInbox/URIimap.pm @@ -144,4 +144,6 @@ sub scheme { sub as_string { ${$_[0]} } +sub clone { ref($_[0])->new(as_string($_[0])) } + 1; diff --git a/t/lei-import-imap.t b/t/lei-import-imap.t index cf1fa49d..611328b4 100644 --- a/t/lei-import-imap.t +++ b/t/lei-import-imap.t @@ -45,8 +45,8 @@ test_lei({ tmpdir => $tmpdir }, sub { is_deeply(\%r, { 'HASH' => scalar(@$out) }, 'all hashes'); lei_ok([qw(tag +kw:seen), $url], undef, undef); - my $f = "$ENV{HOME}/.local/share/lei/store/net_last.sqlite3"; - ok(-s $f, 'net tracked for redundant imports'); + my $f = "$ENV{HOME}/.local/share/lei/store/mail_sync.sqlite3"; + ok(-s $f, 'mail_sync tracked for redundant imports'); lei_ok('inspect', "blob:$out->[5]->{blob}"); my $x = json_utf8->decode($lei_out); is(ref($x->{'lei/store'}), 'ARRAY', 'lei/store in inspect'); diff --git a/t/lei-import-nntp.t b/t/lei-import-nntp.t index d795a86a..12bb002a 100644 --- a/t/lei-import-nntp.t +++ b/t/lei-import-nntp.t @@ -27,7 +27,7 @@ test_lei({ tmpdir => $tmpdir }, sub { for (@$out) { $r{ref($_)}++ } is_deeply(\%r, { 'HASH' => scalar(@$out) }, 'all hashes'); - my $f = "$ENV{HOME}/.local/share/lei/store/net_last.sqlite3"; - ok(-s $f, 'net tracked for redundant imports'); + my $f = "$ENV{HOME}/.local/share/lei/store/mail_sync.sqlite3"; + ok(-s $f, 'mail_sync exists tracked for redundant imports'); }); done_testing; |