* [PATCH 00/12] lei: fix various annoyances
@ 2021-09-21 7:41 7% Eric Wong
2021-09-21 7:41 6% ` [PATCH 03/12] lei_mail_sync: account for non-unique cases Eric Wong
0 siblings, 1 reply; 2+ results
From: Eric Wong @ 2021-09-21 7:41 UTC (permalink / raw)
To: meta
Eric Wong (12):
lei inspect: convert to WQ worker
lei inspect: support NNTP URLs
lei_mail_sync: account for non-unique cases
lei: simplify internal arg2folder usage
lei lcat: use single queue for ordering
doc: lei-security: section for WIP auth methods
lei lcat: support NNTP URLs
lei: various completion improvements
lei q: show progress on >1s preparation phase
search: drop reopen retry message
lei q: update messages to reflect --save default
lei q: improve --limit behavior and progress
Documentation/lei-q.pod | 6 +-
Documentation/lei-security.pod | 8 ++
lib/PublicInbox/LEI.pm | 2 +-
lib/PublicInbox/LeiExportKw.pm | 12 +--
lib/PublicInbox/LeiExternal.pm | 5 ++
lib/PublicInbox/LeiForgetMailSync.pm | 9 +--
lib/PublicInbox/LeiImport.pm | 20 +++--
lib/PublicInbox/LeiImportKw.pm | 12 ++-
lib/PublicInbox/LeiInspect.pm | 103 +++++++++++++++++---------
lib/PublicInbox/LeiLcat.pm | 91 +++++++++++++----------
lib/PublicInbox/LeiLsMailSource.pm | 11 +--
lib/PublicInbox/LeiMailSync.pm | 102 +++++++++++++++++--------
lib/PublicInbox/LeiNoteEvent.pm | 5 +-
lib/PublicInbox/LeiQuery.pm | 9 ++-
lib/PublicInbox/LeiRefreshMailSync.pm | 18 +++--
lib/PublicInbox/LeiSavedSearch.pm | 22 +++---
lib/PublicInbox/LeiTag.pm | 14 ++--
lib/PublicInbox/LeiToMail.pm | 23 +++++-
lib/PublicInbox/LeiUp.pm | 25 +++----
lib/PublicInbox/LeiXSearch.pm | 36 ++++++---
lib/PublicInbox/Search.pm | 1 -
lib/PublicInbox/SharedKV.pm | 19 +++--
lib/PublicInbox/TestCommon.pm | 11 ++-
t/lei-import-nntp.t | 44 +++++++++++
t/lei-q-save.t | 6 +-
t/lei-watch.t | 2 +-
26 files changed, 414 insertions(+), 202 deletions(-)
^ permalink raw reply [relevance 7%]
* [PATCH 03/12] lei_mail_sync: account for non-unique cases
2021-09-21 7:41 7% [PATCH 00/12] lei: fix various annoyances Eric Wong
@ 2021-09-21 7:41 6% ` Eric Wong
0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2021-09-21 7:41 UTC (permalink / raw)
To: meta
NNTP servers, IMAP servers, and various MUAs may recycle
"unique" identifiers due to software bugs or careless BOFHs.
Warn about them, but always be prepared to account for them.
---
lib/PublicInbox/LeiImport.pm | 8 ++++++--
lib/PublicInbox/LeiImportKw.pm | 12 ++++++++----
lib/PublicInbox/LeiInspect.pm | 14 +++++++-------
lib/PublicInbox/LeiLcat.pm | 7 ++-----
lib/PublicInbox/LeiMailSync.pm | 20 ++++++++++----------
lib/PublicInbox/LeiRefreshMailSync.pm | 2 +-
6 files changed, 34 insertions(+), 29 deletions(-)
diff --git a/lib/PublicInbox/LeiImport.pm b/lib/PublicInbox/LeiImport.pm
index 40530914..3c30db8d 100644
--- a/lib/PublicInbox/LeiImport.pm
+++ b/lib/PublicInbox/LeiImport.pm
@@ -37,8 +37,12 @@ sub pmdir_cb { # called via wq_io_do from LeiPmdir->each_mdir_fn
substr($folder, 0, 0) = 'maildir:'; # add prefix
my $lse = $self->{lse} //= $self->{lei}->{sto}->search;
my $lms = $self->{-lms_ro} //= $self->{lei}->lms; # may be 0 or undef
- my $oidbin = $lms ? $lms->name_oidbin($folder, $bn) : undef;
- my @docids = defined($oidbin) ? $lse->over->oidbin_exists($oidbin) : ();
+ my @oidbin = $lms ? $lms->name_oidbin($folder, $bn) : ();
+ @oidbin > 1 and $self->{lei}->err("W: $folder/*/$$bn not unique:\n",
+ map { "\t".unpack('H*', $_)."\n" } @oidbin);
+ my %seen;
+ my @docids = sort { $a <=> $b } grep { !$seen{$_}++ }
+ map { $lse->over->oidbin_exists($_) } @oidbin;
my $vmd = $self->{-import_kw} ? { kw => $kw } : undef;
if (scalar @docids) {
$lse->kw_changed(undef, $kw, \@docids) or return;
diff --git a/lib/PublicInbox/LeiImportKw.pm b/lib/PublicInbox/LeiImportKw.pm
index 379101c2..21c93515 100644
--- a/lib/PublicInbox/LeiImportKw.pm
+++ b/lib/PublicInbox/LeiImportKw.pm
@@ -34,11 +34,15 @@ sub ipc_atfork_child {
sub ck_update_kw { # via wq_io_do
my ($self, $url, $uid, $kw) = @_;
- my $oidbin = $self->{-lms_ro}->imap_oidbin($url, $uid) // return;
- my @docids = $self->{over}->oidbin_exists($oidbin) or return;
+ my @oidbin = $self->{-lms_ro}->num_oidbin($url, $uid);
+ my $uid_url = "$url/;UID=$uid";
+ @oidbin > 1 and $self->{lei}->err("W: $uid_url not unique:\n",
+ map { "\t".unpack('H*', $_)."\n" } @oidbin);
+ my %seen;
+ my @docids = sort { $a <=> $b } grep { !$seen{$_}++ }
+ map { $self->{over}->oidbin_exists($_) } @oidbin;
$self->{lse}->kw_changed(undef, $kw, \@docids) or return;
- $self->{verbose} and
- $self->{lei}->qerr('# '.unpack('H*', $oidbin)." => @$kw\n");
+ $self->{verbose} and $self->{lei}->qerr("# $uid_url => @$kw\n");
$self->{sto}->wq_do('set_eml_vmd', undef, { kw => $kw }, \@docids);
}
diff --git a/lib/PublicInbox/LeiInspect.pm b/lib/PublicInbox/LeiInspect.pm
index ab2c98d9..722ba5b2 100644
--- a/lib/PublicInbox/LeiInspect.pm
+++ b/lib/PublicInbox/LeiInspect.pm
@@ -32,11 +32,9 @@ sub inspect_imap_uid ($$) {
my ($lei, $uid_uri) = @_;
my $ent = {};
my $lms = $lei->lms or return $ent;
- my $oidhex = $lms->imap_oid($lei, $uid_uri);
- if (ref(my $err = $oidhex)) { # arg2folder error
- $lei->qerr(@{$err->{qerr}}) if $err->{qerr};
- }
- $ent->{$$uid_uri} = $oidhex;
+ my @oidhex = $lms->imap_oidhex($lei, $uid_uri);
+ $ent->{$$uid_uri} = @oidhex == 1 ? $oidhex[0] :
+ ((@oidhex == 0) ? undef : \@oidhex);
$ent;
}
@@ -54,8 +52,10 @@ sub inspect_nntp_range {
}
$end //= $beg;
for my $art ($beg..$end) {
- my $oidbin = $lms->imap_oidbin($folders->[0], $art);
- $ent->{$art} = $oidbin ? unpack('H*', $oidbin) : undef;
+ my @oidhex = map { unpack('H*', $_) }
+ $lms->num_oidbin($folders->[0], $art);
+ $ent->{$art} = @oidhex == 1 ? $oidhex[0] :
+ ((@oidhex == 0) ? undef : \@oidhex);
}
$ret;
}
diff --git a/lib/PublicInbox/LeiLcat.pm b/lib/PublicInbox/LeiLcat.pm
index 1e54c3bf..8f8e83bc 100644
--- a/lib/PublicInbox/LeiLcat.pm
+++ b/lib/PublicInbox/LeiLcat.pm
@@ -32,11 +32,8 @@ sub lcat_imap_uri ($$) {
my $lms = $lei->lms or return;
# cf. LeiXsearch->lcat_dump
if (defined $uri->uid) {
- my $oidhex = $lms->imap_oid($lei, $uri);
- if (ref(my $err = $oidhex)) { # art2folder error
- $lei->qerr(@{$err->{qerr}}) if $err->{qerr};
- }
- push @{$lei->{lcat_blob}}, $oidhex;
+ my @oidhex = $lms->imap_oidhex($lei, $uri);
+ push @{$lei->{lcat_blob}}, @oidhex;
} elsif (defined(my $fid = $lms->fid_for($$uri))) {
push @{$lei->{lcat_fid}}, $fid;
} else {
diff --git a/lib/PublicInbox/LeiMailSync.pm b/lib/PublicInbox/LeiMailSync.pm
index d9b9e117..3e725d30 100644
--- a/lib/PublicInbox/LeiMailSync.pm
+++ b/lib/PublicInbox/LeiMailSync.pm
@@ -66,6 +66,7 @@ CREATE TABLE IF NOT EXISTS blob2num (
oidbin VARBINARY NOT NULL,
fid INTEGER NOT NULL, /* folder ID */
uid INTEGER NOT NULL, /* NNTP article number, IMAP UID, MH number */
+ /* not UNIQUE(fid, uid), since we may have broken servers */
UNIQUE (oidbin, fid, uid)
)
@@ -78,6 +79,7 @@ CREATE TABLE IF NOT EXISTS blob2name (
oidbin VARBINARY NOT NULL,
fid INTEGER NOT NULL, /* folder ID */
name VARBINARY NOT NULL, /* Maildir basename, JMAP blobId */
+ /* not UNIQUE(fid, name), since we may have broken software */
UNIQUE (oidbin, fid, name)
)
@@ -522,14 +524,14 @@ EOM
}
}
-sub imap_oidbin ($$$) {
- my ($self, $url, $uid) = @_; # $url MUST have UIDVALIDITY
- my $fid = $self->{fmap}->{$url} //= fid_for($self, $url) // return;
+sub num_oidbin ($$$) {
+ my ($self, $url, $uid) = @_; # $url MUST have UIDVALIDITY if IMAP
+ my $fid = $self->{fmap}->{$url} //= fid_for($self, $url) // return ();
my $sth = $self->{dbh}->prepare_cached(<<EOM, undef, 1);
-SELECT oidbin FROM blob2num WHERE fid = ? AND uid = ?
+SELECT oidbin FROM blob2num WHERE fid = ? AND uid = ? ORDER BY _rowid_
EOM
$sth->execute($fid, $uid);
- $sth->fetchrow_array;
+ map { $_->[0] } @{$sth->fetchall_arrayref};
}
sub name_oidbin ($$$) {
@@ -539,10 +541,10 @@ sub name_oidbin ($$$) {
SELECT oidbin FROM blob2name WHERE fid = ? AND name = ?
EOM
$sth->execute($fid, $nm);
- $sth->fetchrow_array;
+ map { $_->[0] } @{$sth->fetchall_arrayref};
}
-sub imap_oid {
+sub imap_oidhex {
my ($self, $lei, $uid_uri) = @_;
my $mailbox_uri = $uid_uri->clone;
$mailbox_uri->uid(undef);
@@ -550,12 +552,10 @@ sub imap_oid {
if (my $err = $self->arg2folder($lei, $folders)) {
if ($err->{fail}) {
$lei->qerr("# no sync information for $mailbox_uri");
- return;
}
$lei->qerr(@{$err->{qerr}}) if $err->{qerr};
}
- my $oidbin = imap_oidbin($self, $folders->[0], $uid_uri->uid);
- $oidbin ? unpack('H*', $oidbin) : undef;
+ map { unpack('H*',$_) } num_oidbin($self, $folders->[0], $uid_uri->uid)
}
1;
diff --git a/lib/PublicInbox/LeiRefreshMailSync.pm b/lib/PublicInbox/LeiRefreshMailSync.pm
index 2f105005..92673492 100644
--- a/lib/PublicInbox/LeiRefreshMailSync.pm
+++ b/lib/PublicInbox/LeiRefreshMailSync.pm
@@ -36,7 +36,7 @@ sub pmdir_cb { # called via LeiPmdir->each_mdir_fn
my ($folder, $bn) = ($f =~ m!\A(.+?)/(?:new|cur)/([^/]+)\z!) or
die "BUG: $f was not from a Maildir?";
substr($folder, 0, 0) = 'maildir:'; # add prefix
- return if defined($self->{lms}->name_oidbin($folder, $bn));
+ return if scalar($self->{lms}->name_oidbin($folder, $bn));
my $eml = eml_from_path($f) // return;
my $oidbin = $self->{lei}->git_oid($eml)->digest;
$self->{lms}->set_src($oidbin, $folder, \$bn);
^ permalink raw reply related [relevance 6%]
Results 1-2 of 2 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2021-09-21 7:41 7% [PATCH 00/12] lei: fix various annoyances Eric Wong
2021-09-21 7:41 6% ` [PATCH 03/12] lei_mail_sync: account for non-unique cases Eric Wong
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).