user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download mbox.gz: |
* [PATCH 2/4] lei import: avoid IMAPTracker, use LeiMailSync more
  @ 2021-04-29  9:46  3% ` Eric Wong
  0 siblings, 0 replies; 3+ results
From: Eric Wong @ 2021-04-29  9:46 UTC (permalink / raw)
  To: meta

IMAPTracker has a UNIQUE constraint on the `url' column,
which may cause compatibility and/or rollback problems
in attempting to deal with UIDVALIDITY changes.

Having multiple sources of truth leads to confusion and bugs,
so relying on LeiMailSync exclusively ought to simplify things.

Furthermore, since LeiMailSync is only written to by LeiStore,
it is safer in that it won't mark a UID or article as imported
until git-fast-import has seen it, and the SQLite commit always
happens after "done\n" is sent to fast-import.

This mostly reverts recent commits to IMAPTracker to support
lei, those are:

1) commit 7632d8f7590daf70c65d4270e750c36552fa9389
   ("net_reader: restart on first UID when UIDVALIDITY changes")
2) commit 311a5d37ad275cd75b1e64d87827c4d13fe4bfab
   ("imap_tracker: prepare for use with lei").

This means public-inbox-watch will not change between 1.6 and
1.7: -watch stops synching a folder when UIDVALIDITY changes.
---
 Documentation/lei-store-format.pod |  2 +-
 lib/PublicInbox/IMAPTracker.pm     | 35 ++++++-----------
 lib/PublicInbox/LEI.pm             |  2 +-
 lib/PublicInbox/LeiImport.pm       | 12 ++----
 lib/PublicInbox/LeiInput.pm        | 10 ++---
 lib/PublicInbox/LeiMailSync.pm     |  2 +-
 lib/PublicInbox/NetReader.pm       | 60 ++++++++++++++++++------------
 lib/PublicInbox/URIimap.pm         |  2 +
 t/lei-import-imap.t                |  4 +-
 t/lei-import-nntp.t                |  4 +-
 10 files changed, 64 insertions(+), 69 deletions(-)

diff --git a/Documentation/lei-store-format.pod b/Documentation/lei-store-format.pod
index 3e1ddc65..71aa72cb 100644
--- a/Documentation/lei-store-format.pod
+++ b/Documentation/lei-store-format.pod
@@ -32,7 +32,7 @@ prevent them from being accidentally treated as a v2 inbox.
   ~/.local/share/lei/store
   - ipc.lock                        # lock file for internal lei IPC
   - local/$EPOCH.git                # normal bare git repositories
-  - net_last.sqlite3                # import state for IMAP & NNTP
+  - mail_sync.sqlite3               # sync state IMAP, Maildir, NNTP
 
 Additionally, the following share the same roles they do in extindex:
 
diff --git a/lib/PublicInbox/IMAPTracker.pm b/lib/PublicInbox/IMAPTracker.pm
index fe813582..5eb33cf7 100644
--- a/lib/PublicInbox/IMAPTracker.pm
+++ b/lib/PublicInbox/IMAPTracker.pm
@@ -39,20 +39,12 @@ sub dbh_new ($) {
 	$dbh;
 }
 
-sub get_last ($;$) {
-	my ($self, $validity) = @_;
-	my $sth;
-	if (defined $validity) {
-		$sth = $self->{dbh}->prepare_cached(<<'', undef, 1);
-SELECT uid_validity, uid FROM imap_last WHERE url = ? AND uid_validity = ?
-
-		$sth->execute($self->{url}, $validity);
-	} else {
-		$sth = $self->{dbh}->prepare_cached(<<'', undef, 1);
+sub get_last ($) {
+	my ($self) = @_;
+	my $sth = $self->{dbh}->prepare_cached(<<'', undef, 1);
 SELECT uid_validity, uid FROM imap_last WHERE url = ?
 
-		$sth->execute($self->{url});
-	}
+	$sth->execute($self->{url});
 	$sth->fetchrow_array;
 }
 
@@ -70,19 +62,16 @@ VALUES (?, ?, ?)
 }
 
 sub new {
-	my ($class, $url, $dbname) = @_;
+	my ($class, $url) = @_;
 
-	unless (defined($dbname)) {
-		# original name for compatibility with old setups:
-		$dbname = PublicInbox::Config->config_dir() . '/imap.sqlite3';
+	# original name for compatibility with old setups:
+	my $dbname = PublicInbox::Config->config_dir() . '/imap.sqlite3';
 
-		# use the new XDG-compliant name for new setups:
-		if (!-f $dbname) {
-			$dbname = ($ENV{XDG_DATA_HOME} //
-					(($ENV{HOME} // '/nonexistent').
-					 '/.local/share')) .
-				'/public-inbox/imap.sqlite3';
-		}
+	# use the new XDG-compliant name for new setups:
+	if (!-f $dbname) {
+		$dbname = ($ENV{XDG_DATA_HOME} //
+			(($ENV{HOME} // '/nonexistent').'/.local/share')) .
+			'/public-inbox/imap.sqlite3';
 	}
 	if (!-f $dbname) {
 		require File::Path;
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index 1ea7c9ca..52ce8ec2 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -211,7 +211,7 @@ our %CMD = ( # sorted in order of importance/use:
 'import' => [ 'LOCATION...|--stdin',
 	'one-time import/update from URL or filesystem',
 	qw(stdin| offset=i recursive|r exclude=s include|I=s
-	lock=s@ in-format|F=s kw! verbose|v+ incremental! sync!), @c_opt ],
+	lock=s@ in-format|F=s kw! verbose|v+ incremental! mail-sync!), @c_opt ],
 'convert' => [ 'LOCATION...|--stdin',
 	'one-time conversion from URL or filesystem to another format',
 	qw(stdin| in-format|F=s out-format|f=s output|mfolder|o=s
diff --git a/lib/PublicInbox/LeiImport.pm b/lib/PublicInbox/LeiImport.pm
index 26127ece..277f4f95 100644
--- a/lib/PublicInbox/LeiImport.pm
+++ b/lib/PublicInbox/LeiImport.pm
@@ -41,18 +41,13 @@ sub input_maildir_cb { # maildir_each_eml cb
 	input_eml_cb($self, $eml, $vmd);
 }
 
-sub input_imap_cb { # imap_each
+sub input_net_cb { # imap_each / nntp_each
 	my ($url, $uid, $kw, $eml, $self) = @_;
 	my $vmd = $self->{-import_kw} ? { kw => $kw } : undef;
 	$vmd->{sync_info} = [ $url, $uid ] if $self->{-mail_sync};
 	input_eml_cb($self, $eml, $vmd);
 }
 
-sub input_nntp_cb { # nntp_each
-	my ($url, $num, $kw, $eml, $self) = @_;
-	input_eml_cb($self, $eml, $self->{-import_kw} ? { kw => $kw } : undef);
-}
-
 sub net_merge_complete { # callback used by LeiAuth
 	my ($self) = @_;
 	$self->wq_io_do('process_inputs');
@@ -69,7 +64,7 @@ sub lei_import { # the main "lei import" method
 	return $lei->fail(join("\n", @{$vmd_mod->{err}})) if $vmd_mod->{err};
 	$self->{all_vmd} = $vmd_mod if scalar keys %$vmd_mod;
 	$self->prepare_inputs($lei, \@inputs) or return;
-	$self->{-mail_sync} = $lei->{opt}->{sync} // 1;
+	$self->{-mail_sync} = $lei->{opt}->{'mail-sync'} // 1;
 
 	$lei->ale; # initialize for workers to read
 	my $j = $lei->{opt}->{jobs} // scalar(@{$self->{inputs}}) || 1;
@@ -77,8 +72,7 @@ sub lei_import { # the main "lei import" method
 		# $j = $net->net_concurrency($j); TODO
 		if ($lei->{opt}->{incremental} // 1) {
 			$net->{incremental} = 1;
-			$net->{itrk_fn} = $lei->store_path .
-						'/net_last.sqlite3';
+			$net->{-lms_ro} = $lei->_lei_store->search->lms // 0;
 		}
 	} else {
 		my $nproc = $self->detect_nproc;
diff --git a/lib/PublicInbox/LeiInput.pm b/lib/PublicInbox/LeiInput.pm
index 785e607d..ce675f40 100644
--- a/lib/PublicInbox/LeiInput.pm
+++ b/lib/PublicInbox/LeiInput.pm
@@ -110,14 +110,12 @@ sub input_path_url {
 	my $ifmt = lc($lei->{opt}->{'in-format'} // '');
 	# TODO auto-detect?
 	if ($input =~ m!\Aimaps?://!i) {
-		$lei->{net}->imap_each($input, $self->can('input_imap_cb') //
-						$self->can('input_net_cb'),
-					$self, @args);
+		$lei->{net}->imap_each($input, $self->can('input_net_cb'),
+						$self, @args);
 		return;
 	} elsif ($input =~ m!\A(?:nntps?|s?news)://!i) {
-		$lei->{net}->nntp_each($input, $self->can('input_nntp_cb') //
-						$self->can('input_net_cb'),
-					$self, @args);
+		$lei->{net}->nntp_each($input, $self->can('input_net_cb'),
+						$self, @args);
 		return;
 	} elsif ($input =~ m!\Ahttps?://!i) {
 		handle_http_input($self, $input, @args);
diff --git a/lib/PublicInbox/LeiMailSync.pm b/lib/PublicInbox/LeiMailSync.pm
index 52f26d69..2ce189fa 100644
--- a/lib/PublicInbox/LeiMailSync.pm
+++ b/lib/PublicInbox/LeiMailSync.pm
@@ -143,7 +143,7 @@ sub each_src {
 }
 
 sub location_stats {
-	my ($self, $folder, $cb, @args) = @_;
+	my ($self, $folder) = @_;
 	my $dbh = $self->{dbh} //= dbh_new($self);
 	my $fid;
 	my $ret = {};
diff --git a/lib/PublicInbox/NetReader.pm b/lib/PublicInbox/NetReader.pm
index 5978752f..81d25ead 100644
--- a/lib/PublicInbox/NetReader.pm
+++ b/lib/PublicInbox/NetReader.pm
@@ -235,7 +235,7 @@ sub imap_common_init ($;$) {
 	$self->{quiet} = 1 if $lei && $lei->{opt}->{quiet};
 	eval { require PublicInbox::IMAPClient } or
 		die "Mail::IMAPClient is required for IMAP:\n$@\n";
-	eval { require PublicInbox::IMAPTracker } or
+	($lei || eval { require PublicInbox::IMAPTracker }) or
 		die "DBD::SQLite is required for IMAP\n:$@\n";
 	require PublicInbox::URIimap;
 	my $cfg = $self->{pi_cfg} // $lei->_lei_cfg;
@@ -283,7 +283,7 @@ sub nntp_common_init ($;$) {
 	$self->{quiet} = 1 if $lei && $lei->{opt}->{quiet};
 	eval { require Net::NNTP } or
 		die "Net::NNTP is required for NNTP:\n$@\n";
-	eval { require PublicInbox::IMAPTracker } or
+	($lei || eval { require PublicInbox::IMAPTracker }) or
 		die "DBD::SQLite is required for NNTP\n:$@\n";
 	my $cfg = $self->{pi_cfg} // $lei->_lei_cfg;
 	my $nn_args = {}; # scheme://authority => Net::NNTP->new arg
@@ -373,17 +373,28 @@ sub run_commit_cb ($) {
 	$cb->(@args);
 }
 
-sub _itrk ($$) {
-	my ($self, $uri) = @_;
-	return unless $self->{incremental};
-	# itrk_fn is set by lei
-	PublicInbox::IMAPTracker->new($$uri, $self->{itrk_fn});
+sub _itrk_last ($$;$) {
+	my ($self, $uri, $r_uidval) = @_;
+	return (undef, undef, $r_uidval) unless $self->{incremental};
+	my ($itrk, $l_uid, $l_uidval);
+	if (defined(my $lms = $self->{-lms_ro})) { # LeiMailSync or 0
+		$uri->uidvalidity($r_uidval) if defined $r_uidval;
+		my $x;
+		$l_uid = ($lms && ($x = $lms->location_stats($$uri))) ?
+				$x->{'uid.max'} : undef;
+		# itrk remains undef, lei/store worker writes to
+		# mail_sync.sqlite3
+	} else {
+		$itrk = PublicInbox::IMAPTracker->new($$uri);
+		($l_uidval, $l_uid) = $itrk->get_last($$uri);
+	}
+	($itrk, $l_uid, $l_uidval //= $r_uidval);
 }
 
 sub _imap_fetch_all ($$$) {
-	my ($self, $mic, $uri) = @_;
-	my $sec = uri_section($uri);
-	my $mbx = $uri->mailbox;
+	my ($self, $mic, $orig_uri) = @_;
+	my $sec = uri_section($orig_uri);
+	my $mbx = $orig_uri->mailbox;
 	$mic->Clear(1); # trim results history
 	$mic->examine($mbx) or return "E: EXAMINE $mbx ($sec) failed: $!";
 	my ($r_uidval, $r_uidnext);
@@ -393,20 +404,22 @@ sub _imap_fetch_all ($$$) {
 		last if $r_uidval && $r_uidnext;
 	}
 	$r_uidval //= $mic->uidvalidity($mbx) //
-		return "E: $uri cannot get UIDVALIDITY";
+		return "E: $orig_uri cannot get UIDVALIDITY";
 	$r_uidnext //= $mic->uidnext($mbx) //
-		return "E: $uri cannot get UIDNEXT";
-	my $url = ref($uri)->new($$uri);
-	$url->uidvalidity($r_uidval);
-	$url = $$url;
-	my $itrk = _itrk($self, $uri);
-	my $l_uid;
-	$l_uid = $itrk->get_last($r_uidval) if $itrk;
+		return "E: $orig_uri cannot get UIDNEXT";
+	my $uri = $orig_uri->clone;
+	my ($itrk, $l_uid, $l_uidval) = _itrk_last($self, $uri, $r_uidval);
+	return <<EOF if $l_uidval != $r_uidval;
+E: $uri UIDVALIDITY mismatch
+E: local=$l_uidval != remote=$r_uidval
+EOF
+	$uri->uidvalidity($r_uidval);
 	$l_uid //= 0;
 	my $r_uid = $r_uidnext - 1;
-	if ($l_uid > $r_uid) {
-		return "E: $uri local UID exceeds remote ($l_uid > $r_uid)\n";
-	}
+	return <<EOF if $l_uid > $r_uid;
+E: $uri local UID exceeds remote ($l_uid > $r_uid)
+E: $uri strangely, UIDVALIDLITY matches ($l_uidval)
+EOF
 	return if $l_uid >= $r_uid; # nothing to do
 	$l_uid ||= 1;
 	my ($mod, $shard) = @{$self->{shard_info} // []};
@@ -458,7 +471,7 @@ sub _imap_fetch_all ($$$) {
 				# messages get deleted, so holes appear
 				my $per_uid = delete $r->{$uid} // next;
 				my $raw = delete($per_uid->{$key}) // next;
-				_imap_do_msg($self, $url, $uid, \$raw,
+				_imap_do_msg($self, $$uri, $uid, \$raw,
 						$per_uid->{FLAGS});
 				$last_uid = $uid;
 				last if $self->{quit};
@@ -547,8 +560,7 @@ sub _nntp_fetch_all ($$$) {
 	# IMAPTracker is also used for tracking NNTP, UID == article number
 	# LIST.ACTIVE can get the equivalent of UIDVALIDITY, but that's
 	# expensive.  So we assume newsgroups don't change:
-	my $itrk = _itrk($self, $uri);
-	my (undef, $l_art) = $itrk ? $itrk->get_last : ();
+	my ($itrk, $l_art) = _itrk_last($self, $uri);
 
 	# allow users to specify articles to refetch
 	# cf. https://tools.ietf.org/id/draft-gilman-news-url-01.txt
diff --git a/lib/PublicInbox/URIimap.pm b/lib/PublicInbox/URIimap.pm
index df9f5fd9..f6244137 100644
--- a/lib/PublicInbox/URIimap.pm
+++ b/lib/PublicInbox/URIimap.pm
@@ -144,4 +144,6 @@ sub scheme {
 
 sub as_string { ${$_[0]} }
 
+sub clone { ref($_[0])->new(as_string($_[0])) }
+
 1;
diff --git a/t/lei-import-imap.t b/t/lei-import-imap.t
index cf1fa49d..611328b4 100644
--- a/t/lei-import-imap.t
+++ b/t/lei-import-imap.t
@@ -45,8 +45,8 @@ test_lei({ tmpdir => $tmpdir }, sub {
 	is_deeply(\%r, { 'HASH' => scalar(@$out) }, 'all hashes');
 	lei_ok([qw(tag +kw:seen), $url], undef, undef);
 
-	my $f = "$ENV{HOME}/.local/share/lei/store/net_last.sqlite3";
-	ok(-s $f, 'net tracked for redundant imports');
+	my $f = "$ENV{HOME}/.local/share/lei/store/mail_sync.sqlite3";
+	ok(-s $f, 'mail_sync tracked for redundant imports');
 	lei_ok('inspect', "blob:$out->[5]->{blob}");
 	my $x = json_utf8->decode($lei_out);
 	is(ref($x->{'lei/store'}), 'ARRAY', 'lei/store in inspect');
diff --git a/t/lei-import-nntp.t b/t/lei-import-nntp.t
index d795a86a..12bb002a 100644
--- a/t/lei-import-nntp.t
+++ b/t/lei-import-nntp.t
@@ -27,7 +27,7 @@ test_lei({ tmpdir => $tmpdir }, sub {
 	for (@$out) { $r{ref($_)}++ }
 	is_deeply(\%r, { 'HASH' => scalar(@$out) }, 'all hashes');
 
-	my $f = "$ENV{HOME}/.local/share/lei/store/net_last.sqlite3";
-	ok(-s $f, 'net tracked for redundant imports');
+	my $f = "$ENV{HOME}/.local/share/lei/store/mail_sync.sqlite3";
+	ok(-s $f, 'mail_sync exists tracked for redundant imports');
 });
 done_testing;

^ permalink raw reply related	[relevance 3%]

* Re: [PATCH] net_reader: restart on first UID when UIDVALIDITY changes
  2021-04-23  7:28  6% [PATCH] net_reader: restart on first UID when UIDVALIDITY changes Eric Wong
@ 2021-04-23 11:26  7% ` Eric Wong
  0 siblings, 0 replies; 3+ results
From: Eric Wong @ 2021-04-23 11:26 UTC (permalink / raw)
  To: meta

Eric Wong <e@80x24.org> wrote:
> In other words, treat the same IMAP folder with a different
> UIDVALIDITY a completely different folder.  If the UIDVALIDITY

"as a completely different folder"

Pushed as commit 7632d8f7590daf70c65d4270e750c36552fa9389

^ permalink raw reply	[relevance 7%]

* [PATCH] net_reader: restart on first UID when UIDVALIDITY changes
@ 2021-04-23  7:28  6% Eric Wong
  2021-04-23 11:26  7% ` Eric Wong
  0 siblings, 1 reply; 3+ results
From: Eric Wong @ 2021-04-23  7:28 UTC (permalink / raw)
  To: meta

In other words, treat the same IMAP folder with a different
UIDVALIDITY a completely different folder.  If the UIDVALIDITY
changes, we can start from UID=1 without falling behind or
losing data.  If the UIDVALIDITY gets reset to a previously
known-good message, we can still resume where we left off
before the first UIDVALIDITY change.

This affects public-inbox-watch and "lei import"

One potential downside of this is for rare altid users, but
that's mainly intended for NNTP article numbers which are/were
often publicized; not IMAP UIDs which are rarely publicized.

The other potential downside is bandwidth waste in in the rare
case UIDVALIDITY changes while IMAP folder contents remain
unchanged.  There's no extra storage used due to existing
(v1|v2|lei/store) deduplication mechanisms.

Before this change, we were matching offlineimap behavior and
stopped synching an IMAP folder when its UIDVALIDITY changed.
offlineimap behavior made sense for IMAP <=> Maildir
synchronization since Maildirs had no sense of UIDVALIDITY and
could only rely on name mapping.
---
 lib/PublicInbox/IMAPTracker.pm | 16 ++++++++++++----
 lib/PublicInbox/NetReader.pm   | 11 +++--------
 2 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/lib/PublicInbox/IMAPTracker.pm b/lib/PublicInbox/IMAPTracker.pm
index bcf7af2e..fe813582 100644
--- a/lib/PublicInbox/IMAPTracker.pm
+++ b/lib/PublicInbox/IMAPTracker.pm
@@ -39,12 +39,20 @@ sub dbh_new ($) {
 	$dbh;
 }
 
-sub get_last ($) {
-	my ($self) = @_;
-	my $sth = $self->{dbh}->prepare_cached(<<'', undef, 1);
+sub get_last ($;$) {
+	my ($self, $validity) = @_;
+	my $sth;
+	if (defined $validity) {
+		$sth = $self->{dbh}->prepare_cached(<<'', undef, 1);
+SELECT uid_validity, uid FROM imap_last WHERE url = ? AND uid_validity = ?
+
+		$sth->execute($self->{url}, $validity);
+	} else {
+		$sth = $self->{dbh}->prepare_cached(<<'', undef, 1);
 SELECT uid_validity, uid FROM imap_last WHERE url = ?
 
-	$sth->execute($self->{url});
+		$sth->execute($self->{url});
+	}
 	$sth->fetchrow_array;
 }
 
diff --git a/lib/PublicInbox/NetReader.pm b/lib/PublicInbox/NetReader.pm
index c7b43f01..39129b34 100644
--- a/lib/PublicInbox/NetReader.pm
+++ b/lib/PublicInbox/NetReader.pm
@@ -397,17 +397,12 @@ sub _imap_fetch_all ($$$) {
 	$r_uidnext //= $mic->uidnext($mbx) //
 		return "E: $uri cannot get UIDNEXT";
 	my $itrk = _itrk($self, $uri);
-	my ($l_uidval, $l_uid) = $itrk ? $itrk->get_last : ();
-	$l_uidval //= $r_uidval; # first time
+	my $l_uid;
+	$l_uid = $itrk->get_last($r_uidval) if $itrk;
 	$l_uid //= 0;
-	if ($l_uidval != $r_uidval) {
-		return "E: $uri UIDVALIDITY mismatch\n".
-			"E: local=$l_uidval != remote=$r_uidval";
-	}
 	my $r_uid = $r_uidnext - 1;
 	if ($l_uid > $r_uid) {
-		return "E: $uri local UID exceeds remote ($l_uid > $r_uid)\n".
-			"E: $uri strangely, UIDVALIDLITY matches ($l_uidval)\n";
+		return "E: $uri local UID exceeds remote ($l_uid > $r_uid)\n";
 	}
 	return if $l_uid >= $r_uid; # nothing to do
 	$l_uid ||= 1;

^ permalink raw reply related	[relevance 6%]

Results 1-3 of 3 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2021-04-23  7:28  6% [PATCH] net_reader: restart on first UID when UIDVALIDITY changes Eric Wong
2021-04-23 11:26  7% ` Eric Wong
2021-04-29  9:46     [PATCH 0/4] some lei synchronization work Eric Wong
2021-04-29  9:46  3% ` [PATCH 2/4] lei import: avoid IMAPTracker, use LeiMailSync more Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).