* [PATCH 6/7] lei_mail_sync: for bidirectional keyword sync
2021-04-24 9:28 7% [PATCH 0/7] lei sync preparations, "lei inspect" Eric Wong
@ 2021-04-24 9:28 3% ` Eric Wong
0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2021-04-24 9:28 UTC (permalink / raw)
To: meta
We'll be using the new class to efficiently propagate keyword
changes from lei/store back to Maildir or IMAP folders.
---
MANIFEST | 2 +
lib/PublicInbox/LeiMailSync.pm | 211 +++++++++++++++++++++++++++++++++
t/lei_mail_sync.t | 68 +++++++++++
3 files changed, 281 insertions(+)
create mode 100644 lib/PublicInbox/LeiMailSync.pm
create mode 100644 t/lei_mail_sync.t
diff --git a/MANIFEST b/MANIFEST
index e0f9c35b..abaf54b0 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -201,6 +201,7 @@ lib/PublicInbox/LeiInit.pm
lib/PublicInbox/LeiInput.pm
lib/PublicInbox/LeiLsLabel.pm
lib/PublicInbox/LeiLsSearch.pm
+lib/PublicInbox/LeiMailSync.pm
lib/PublicInbox/LeiMirror.pm
lib/PublicInbox/LeiOverview.pm
lib/PublicInbox/LeiP2q.pm
@@ -407,6 +408,7 @@ t/lei-tag.t
t/lei.t
t/lei_dedupe.t
t/lei_external.t
+t/lei_mail_sync.t
t/lei_overview.t
t/lei_saved_search.t
t/lei_store.t
diff --git a/lib/PublicInbox/LeiMailSync.pm b/lib/PublicInbox/LeiMailSync.pm
new file mode 100644
index 00000000..52f26d69
--- /dev/null
+++ b/lib/PublicInbox/LeiMailSync.pm
@@ -0,0 +1,211 @@
+# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# for maintaining synchronization between lei/store <=> Maildir|MH|IMAP|JMAP
+package PublicInbox::LeiMailSync;
+use strict;
+use v5.10.1;
+use DBI;
+
+sub dbh_new {
+ my ($self, $rw) = @_;
+ my $f = $self->{filename};
+ my $creat;
+ if (!-f $f && $rw) {
+ require PublicInbox::Spawn;
+ open my $fh, '+>>', $f or die "failed to open $f: $!";
+ PublicInbox::Spawn::nodatacow_fd(fileno($fh));
+ $creat = 1;
+ }
+ my $dbh = DBI->connect("dbi:SQLite:dbname=$f",'','', {
+ AutoCommit => 1,
+ RaiseError => 1,
+ PrintError => 0,
+ ReadOnly => !$rw,
+ sqlite_use_immediate_transaction => 1,
+ });
+ # no sqlite_unicode, here, all strings are binary
+ create_tables($dbh) if $rw;
+ $dbh->do('PRAGMA journal_mode = WAL') if $creat;
+ $dbh->do('PRAGMA case_sensitive_like = ON');
+ $dbh;
+}
+
+sub new {
+ my ($cls, $f) = @_;
+ bless { filename => $f, fmap => {} }, $cls;
+}
+
+sub lms_commit { delete($_[0]->{dbh})->commit }
+
+sub lms_begin { ($_[0]->{dbh} //= dbh_new($_[0], 1))->begin_work };
+
+sub create_tables {
+ my ($dbh) = @_;
+
+ $dbh->do(<<'');
+CREATE TABLE IF NOT EXISTS folders (
+ fid INTEGER PRIMARY KEY,
+ loc VARBINARY NOT NULL, /* URL;UIDVALIDITY=$N or $TYPE:/pathname */
+ UNIQUE (loc)
+)
+
+ $dbh->do(<<'');
+CREATE TABLE IF NOT EXISTS blob2num (
+ oidbin VARBINARY NOT NULL,
+ fid INTEGER NOT NULL, /* folder ID */
+ uid INTEGER NOT NULL, /* NNTP article number, IMAP UID, MH number */
+ UNIQUE (oidbin, fid, uid)
+)
+
+ $dbh->do(<<'');
+CREATE TABLE IF NOT EXISTS blob2name (
+ oidbin VARBINARY NOT NULL,
+ fid INTEGER NOT NULL, /* folder ID */
+ name VARBINARY NOT NULL, /* Maildir basename, JMAP blobId */
+ UNIQUE (oidbin, fid, name)
+)
+
+}
+
+sub _fid_for {
+ my ($self, $folder, $rw) = @_;
+ my $dbh = $self->{dbh};
+ my ($row) = $dbh->selectrow_array(<<'', undef, $folder);
+SELECT fid FROM folders WHERE loc = ? LIMIT 1
+
+ return $row if defined $row;
+ return unless $rw;
+
+ ($row) = $dbh->selectrow_array('SELECT MAX(fid) FROM folders');
+
+ my $fid = ($row // 0) + 1;
+ # in case we're reusing, clobber existing stale refs:
+ $dbh->do('DELETE FROM blob2name WHERE fid = ?', undef, $fid);
+ $dbh->do('DELETE FROM blob2num WHERE fid = ?', undef, $fid);
+
+ my $sth = $dbh->prepare('INSERT INTO folders (fid, loc) VALUES (?, ?)');
+ $sth->execute($fid, $folder);
+
+ $fid;
+}
+
+sub set_src {
+ my ($self, $oidhex, $folder, $id) = @_;
+ my $fid = $self->{fmap}->{$folder} //= _fid_for($self, $folder, 1);
+ my $sth;
+ if (ref($id)) { # scalar name
+ $id = $$id;
+ $sth = $self->{dbh}->prepare_cached(<<'');
+INSERT OR IGNORE INTO blob2name (oidbin, fid, name) VALUES (?, ?, ?)
+
+ } else { # numeric ID (IMAP UID, MH number)
+ $sth = $self->{dbh}->prepare_cached(<<'');
+INSERT OR IGNORE INTO blob2num (oidbin, fid, uid) VALUES (?, ?, ?)
+
+ }
+ $sth->execute(pack('H*', $oidhex), $fid, $id);
+}
+
+sub clear_src {
+ my ($self, $folder, $id) = @_;
+ my $fid = $self->{fmap}->{$folder} //= _fid_for($self, $folder, 1);
+ my $sth;
+ if (ref($id)) { # scalar name
+ $id = $$id;
+ $sth = $self->{dbh}->prepare_cached(<<'');
+DELETE FROM blob2name WHERE fid = ? AND name = ?
+
+ } else {
+ $sth = $self->{dbh}->prepare_cached(<<'');
+DELETE FROM blob2num WHERE fid = ? AND uid = ?
+
+ }
+ $sth->execute($fid, $id);
+}
+
+# read-only, iterates every oidbin + UID or name for a given folder
+sub each_src {
+ my ($self, $folder, $cb, @args) = @_;
+ my $dbh = $self->{dbh} //= dbh_new($self);
+ my ($fid, $sth);
+ $fid = $self->{fmap}->{$folder} //= _fid_for($self, $folder) // return;
+ $sth = $dbh->prepare('SELECT oidbin,uid FROM blob2num WHERE fid = ?');
+ $sth->execute($fid);
+ while (my ($oidbin, $id) = $sth->fetchrow_array) {
+ $cb->($oidbin, $id, @args);
+ }
+ $sth = $dbh->prepare('SELECT oidbin,name FROM blob2name WHERE fid = ?');
+ $sth->execute($fid);
+ while (my ($oidbin, $id) = $sth->fetchrow_array) {
+ $cb->($oidbin, \$id, @args);
+ }
+}
+
+sub location_stats {
+ my ($self, $folder, $cb, @args) = @_;
+ my $dbh = $self->{dbh} //= dbh_new($self);
+ my $fid;
+ my $ret = {};
+ $fid = $self->{fmap}->{$folder} //= _fid_for($self, $folder) // return;
+ my ($row) = $dbh->selectrow_array(<<"", undef, $fid);
+SELECT COUNT(name) FROM blob2name WHERE fid = ?
+
+ $ret->{'name.count'} = $row if $row;
+ for my $op (qw(count min max)) {
+ ($row) = $dbh->selectrow_array(<<"", undef, $fid);
+SELECT $op(uid) FROM blob2num WHERE fid = ?
+
+ $row or last;
+ $ret->{"uid.$op"} = $row;
+ }
+ $ret;
+}
+
+# returns a { location => [ list-of-ids-or-names ] } mapping
+sub locations_for {
+ my ($self, $oidhex) = @_;
+ my ($fid, $sth, $id, %fid2id);
+ my $dbh = $self->{dbh} //= dbh_new($self);
+ $sth = $dbh->prepare('SELECT fid,uid FROM blob2num WHERE oidbin = ?');
+ $sth->execute(pack('H*', $oidhex));
+ while (my ($fid, $uid) = $sth->fetchrow_array) {
+ push @{$fid2id{$fid}}, $uid;
+ }
+ $sth = $dbh->prepare('SELECT fid,name FROM blob2name WHERE oidbin = ?');
+ $sth->execute(pack('H*', $oidhex));
+ while (my ($fid, $name) = $sth->fetchrow_array) {
+ push @{$fid2id{$fid}}, $name;
+ }
+ $sth = $dbh->prepare('SELECT loc FROM folders WHERE fid = ? LIMIT 1');
+ my $ret = {};
+ while (my ($fid, $ids) = each %fid2id) {
+ $sth->execute($fid);
+ my ($loc) = $sth->fetchrow_array;
+ unless (defined $loc) {
+ warn "E: fid=$fid for $oidhex unknown:\n", map {
+ 'E: '.(ref() ? $$_ : "#$_")."\n";
+ } @$ids;
+ next;
+ }
+ $ret->{$loc} = $ids;
+ }
+ scalar(keys %$ret) ? $ret : undef;
+}
+
+# returns a list of folders used for completion
+sub folders {
+ my ($self, $pfx) = @_;
+ my $dbh = $self->{dbh} //= dbh_new($self);
+ my $sql = 'SELECT loc FROM folders';
+ my @pfx;
+ if (defined $pfx) {
+ $sql .= ' WHERE loc LIKE ? ESCAPE ?';
+ @pfx = ($pfx, '\\');
+ $pfx[0] =~ s/([%_\\])/\\$1/g; # glob chars
+ $pfx[0] .= '%';
+ }
+ map { $_->[0] } @{$dbh->selectall_arrayref($sql, undef, @pfx)};
+}
+
+1;
diff --git a/t/lei_mail_sync.t b/t/lei_mail_sync.t
new file mode 100644
index 00000000..864d6e48
--- /dev/null
+++ b/t/lei_mail_sync.t
@@ -0,0 +1,68 @@
+#!perl -w
+# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use v5.10.1;
+use PublicInbox::TestCommon;
+require_mods(qw(DBD::SQLite));
+require_ok 'PublicInbox::LeiMailSync';
+my ($dir, $for_destroy) = tmpdir();
+my $lms = PublicInbox::LeiMailSync->new("$dir/t.sqlite3");
+
+$lms->lms_begin;
+$lms->lms_commit;
+my $ro = PublicInbox::LeiMailSync->new("$dir/t.sqlite3");
+is_deeply([$ro->folders], [], 'no folders, yet');
+
+my $imap = 'imaps://bob@[::1]/INBOX;UIDVALIDITY=9';
+$lms->lms_begin;
+is($lms->set_src('deadbeef', $imap, 1), 1, 'set IMAP once');
+ok($lms->set_src('deadbeef', $imap, 1) == 0, 'set IMAP idempotently');
+$lms->lms_commit;
+is_deeply([$ro->folders], [$imap], 'IMAP folder added');
+is_deeply([$ro->folders($imap)], [$imap], 'IMAP folder with full GLOB');
+is_deeply([$ro->folders('imaps://bob@[::1]/INBOX')], [$imap],
+ 'IMAP folder with partial GLOB');
+
+is_deeply($ro->locations_for('deadbeef'),
+ { $imap => [ 1 ] }, 'locations_for w/ imap');
+
+my $maildir = 'maildir:/home/user/md';
+my $fname = 'foo:2,S';
+$lms->lms_begin;
+ok($lms->set_src('deadbeef', $maildir, \$fname), 'set Maildir once');
+ok($lms->set_src('deadbeef', $maildir, \$fname) == 0, 'set Maildir again');
+$lms->lms_commit;
+is_deeply($ro->locations_for('deadbeef'),
+ { $imap => [ 1 ], $maildir => [ $fname ] },
+ 'locations_for w/ maildir + imap');
+
+is_deeply([sort($ro->folders)], [$imap, $maildir], 'both folders shown');
+my @res;
+$ro->each_src($maildir, sub {
+ my ($oidbin, $id) = @_;
+ push @res, [ unpack('H*', $oidbin), $id ];
+});
+is_deeply(\@res, [ ['deadbeef', \$fname] ], 'each_src works on Maildir');
+
+@res = ();
+$ro->each_src($imap, sub {
+ my ($oidbin, $id) = @_;
+ push @res, [ unpack('H*', $oidbin), $id ];
+});
+is_deeply(\@res, [ ['deadbeef', 1] ], 'each_src works on IMAP');
+
+is_deeply($ro->location_stats($maildir), { 'name.count' => 1 },
+ 'Maildir location stats');
+is_deeply($ro->location_stats($imap),
+ { 'uid.count' => 1, 'uid.max' => 1, 'uid.min' => 1 },
+ 'IMAP location stats');
+$lms->lms_begin;
+is($lms->clear_src($imap, 1), 1, 'clear_src on IMAP');
+is($lms->clear_src($maildir, \$fname), 1, 'clear_src on Maildir');
+ok($lms->clear_src($imap, 1) == 0, 'clear_src again on IMAP');
+ok($lms->clear_src($maildir, \$fname) == 0, 'clear_src again on Maildir');
+$lms->lms_commit;
+is_deeply($ro->location_stats($maildir), {}, 'nothing left');
+
+done_testing;
^ permalink raw reply related [relevance 3%]
* [PATCH 0/7] lei sync preparations, "lei inspect"
@ 2021-04-24 9:28 7% Eric Wong
2021-04-24 9:28 3% ` [PATCH 6/7] lei_mail_sync: for bidirectional keyword sync Eric Wong
0 siblings, 1 reply; 2+ results
From: Eric Wong @ 2021-04-24 9:28 UTC (permalink / raw)
To: meta
"lei import" now tracks IMAP and Maildir source information in
preparation for propagating keywords back to IMAP|Maildir.
"lei inspect" is a long-overdue debug/diagnostic thing.
Eric Wong (7):
lei_input: drop outdated comment w.r.t. compression
t/lei_to_mail: split "lei import" test $HOME directory
URIimap: support ->uidvalidity and ->iuid
net_reader: imap_each: add UIDVALIDITY to URL arg
doc: lei_design_notes: add a bit on WAL usage
lei_mail_sync: for bidirectional keyword sync
lei import: keep sync info for Maildir and IMAP folders
Documentation/lei_design_notes.txt | 12 ++
MANIFEST | 3 +
lib/PublicInbox/LEI.pm | 16 ++-
lib/PublicInbox/LeiImport.pm | 22 ++-
lib/PublicInbox/LeiInput.pm | 42 +++++-
lib/PublicInbox/LeiInspect.pm | 96 +++++++++++++
lib/PublicInbox/LeiMailSync.pm | 211 +++++++++++++++++++++++++++++
lib/PublicInbox/LeiSearch.pm | 7 +
lib/PublicInbox/LeiStore.pm | 20 ++-
lib/PublicInbox/NetReader.pm | 11 +-
lib/PublicInbox/TestCommon.pm | 2 +
lib/PublicInbox/URIimap.pm | 38 +++++-
t/lei-import-imap.t | 27 +++-
t/lei-import-maildir.t | 21 +++
t/lei_mail_sync.t | 68 ++++++++++
t/lei_to_mail.t | 8 +-
t/net_reader-imap.t | 4 +-
t/uri_imap.t | 32 ++++-
18 files changed, 612 insertions(+), 28 deletions(-)
create mode 100644 lib/PublicInbox/LeiInspect.pm
create mode 100644 lib/PublicInbox/LeiMailSync.pm
create mode 100644 t/lei_mail_sync.t
^ permalink raw reply [relevance 7%]
Results 1-2 of 2 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2021-04-24 9:28 7% [PATCH 0/7] lei sync preparations, "lei inspect" Eric Wong
2021-04-24 9:28 3% ` [PATCH 6/7] lei_mail_sync: for bidirectional keyword sync Eric Wong
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).