From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH] lei import: support adding keywords and labels on import
Date: Fri, 23 Apr 2021 07:22:01 -0400 [thread overview]
Message-ID: <20210423112201.99369-1-e@80x24.org> (raw)
This saves some work and makes it easier to set volatile
metadata on a message at import time.
---
lib/PublicInbox/LeiImport.pm | 7 +++++
lib/PublicInbox/LeiInput.pm | 54 ++++++++++++++++++++++++++++++++++
lib/PublicInbox/LeiTag.pm | 56 ++----------------------------------
lib/PublicInbox/SearchIdx.pm | 41 +++++++++++++++-----------
t/lei-import.t | 10 +++++++
5 files changed, 98 insertions(+), 70 deletions(-)
diff --git a/lib/PublicInbox/LeiImport.pm b/lib/PublicInbox/LeiImport.pm
index accf08f5..e3c756e8 100644
--- a/lib/PublicInbox/LeiImport.pm
+++ b/lib/PublicInbox/LeiImport.pm
@@ -12,6 +12,10 @@ use parent qw(PublicInbox::IPC PublicInbox::LeiInput);
sub input_eml_cb { # used by PublicInbox::LeiInput::input_fh
my ($self, $eml, $vmd) = @_;
my $xoids = $self->{lei}->{ale}->xoids_for($eml);
+ if (my $all_vmd = $self->{all_vmd}) {
+ $vmd //= {};
+ @$vmd{keys %$all_vmd} = values %$all_vmd;
+ }
$self->{lei}->{sto}->ipc_do('set_eml', $eml, $vmd, $xoids);
}
@@ -53,6 +57,9 @@ sub lei_import { # the main "lei import" method
$sto->write_prepare($lei);
my $self = bless {}, __PACKAGE__;
$self->{-import_kw} = $lei->{opt}->{kw} // 1;
+ my $vmd_mod = $self->vmd_mod_extract(\@inputs);
+ return $lei->fail(join("\n", @{$vmd_mod->{err}})) if $vmd_mod->{err};
+ $self->{all_vmd} = $vmd_mod if scalar keys %$vmd_mod;
$self->prepare_inputs($lei, \@inputs) or return;
$lei->ale; # initialize for workers to read
my $j = $lei->{opt}->{jobs} // scalar(@{$self->{inputs}}) || 1;
diff --git a/lib/PublicInbox/LeiInput.pm b/lib/PublicInbox/LeiInput.pm
index e416d3ed..de60a076 100644
--- a/lib/PublicInbox/LeiInput.pm
+++ b/lib/PublicInbox/LeiInput.pm
@@ -7,6 +7,38 @@ use strict;
use v5.10.1;
use PublicInbox::DS;
+# JMAP RFC 8621 4.1.1
+# https://www.iana.org/assignments/imap-jmap-keywords/imap-jmap-keywords.xhtml
+our @KW = (qw(seen answered flagged draft), # widely-compatible
+ qw(forwarded), # IMAP + Maildir
+ qw(phishing junk notjunk)); # rarely supported
+
+# note: RFC 8621 states "Users may add arbitrary keywords to an Email",
+# but is it good idea? Stick to the system and reserved ones, for now.
+# The widely-compatible ones map to IMAP system flags, Maildir flags
+# and mbox Status/X-Status headers.
+my %KW = map { $_ => 1 } @KW;
+my $L_MAX = 244; # Xapian term limit - length('L')
+
+# RFC 8621, sec 2 (Mailboxes) a "label" for us is a JMAP Mailbox "name"
+# "Servers MAY reject names that violate server policy"
+my %ERR = (
+ L => sub {
+ my ($label) = @_;
+ length($label) >= $L_MAX and
+ return "`$label' too long (must be <= $L_MAX)";
+ $label =~ m{\A[a-z0-9_](?:[a-z0-9_\-\./\@,]*[a-z0-9])?\z}i ?
+ undef : "`$label' is invalid";
+ },
+ kw => sub {
+ my ($kw) = @_;
+ $KW{$kw} ? undef : <<EOM;
+`$kw' is not one of: `seen', `flagged', `answered', `draft'
+`junk', `notjunk', `phishing' or `forwarded'
+EOM
+ }
+);
+
sub check_input_format ($;$) {
my ($lei, $files) = @_;
my $opt_key = 'in-format';
@@ -183,4 +215,26 @@ sub input_only_atfork_child {
undef;
}
+# like Getopt::Long, but for +kw:FOO and -kw:FOO to prepare
+# for update_xvmd -> update_vmd
+sub vmd_mod_extract {
+ my $argv = $_[-1];
+ my $vmd_mod = {};
+ my @new_argv;
+ for my $x (@$argv) {
+ if ($x =~ /\A(\+|\-)(kw|L):(.+)\z/) {
+ my ($op, $pfx, $val) = ($1, $2, $3);
+ if (my $err = $ERR{$pfx}->($val)) {
+ push @{$vmd_mod->{err}}, $err;
+ } else { # set "+kw", "+L", "-L", "-kw"
+ push @{$vmd_mod->{$op.$pfx}}, $val;
+ }
+ } else {
+ push @new_argv, $x;
+ }
+ }
+ @$argv = @new_argv;
+ $vmd_mod;
+}
+
1;
diff --git a/lib/PublicInbox/LeiTag.pm b/lib/PublicInbox/LeiTag.pm
index f019202f..f5791947 100644
--- a/lib/PublicInbox/LeiTag.pm
+++ b/lib/PublicInbox/LeiTag.pm
@@ -7,58 +7,6 @@ use strict;
use v5.10.1;
use parent qw(PublicInbox::IPC PublicInbox::LeiInput);
-# JMAP RFC 8621 4.1.1
-# https://www.iana.org/assignments/imap-jmap-keywords/imap-jmap-keywords.xhtml
-my @KW = (qw(seen answered flagged draft), # widely-compatible
- qw(forwarded phishing junk notjunk)); # rarely supported
-# note: RFC 8621 states "Users may add arbitrary keywords to an Email",
-# but is it good idea? Stick to the system and reserved ones, for now.
-# The widely-compatible ones map to IMAP system flags, Maildir flags
-# and mbox Status/X-Status headers.
-my %KW = map { $_ => 1 } @KW;
-my $L_MAX = 244; # Xapian term limit - length('L')
-
-# RFC 8621, sec 2 (Mailboxes) a "label" for us is a JMAP Mailbox "name"
-# "Servers MAY reject names that violate server policy"
-my %ERR = (
- L => sub {
- my ($label) = @_;
- length($label) >= $L_MAX and
- return "`$label' too long (must be <= $L_MAX)";
- $label =~ m{\A[a-z0-9_](?:[a-z0-9_\-\./\@,]*[a-z0-9])?\z}i ?
- undef : "`$label' is invalid";
- },
- kw => sub {
- my ($kw) = @_;
- $KW{$kw} ? undef : <<EOM;
-`$kw' is not one of: `seen', `flagged', `answered', `draft'
-`junk', `notjunk', `phishing' or `forwarded'
-EOM
- }
-);
-
-# like Getopt::Long, but for +kw:FOO and -kw:FOO to prepare
-# for update_xvmd -> update_vmd
-sub vmd_mod_extract {
- my $argv = $_[-1];
- my $vmd_mod = {};
- my @new_argv;
- for my $x (@$argv) {
- if ($x =~ /\A(\+|\-)(kw|L):(.+)\z/) {
- my ($op, $pfx, $val) = ($1, $2, $3);
- if (my $err = $ERR{$pfx}->($val)) {
- push @{$vmd_mod->{err}}, $err;
- } else { # set "+kw", "+L", "-L", "-kw"
- push @{$vmd_mod->{$op.$pfx}}, $val;
- }
- } else {
- push @new_argv, $x;
- }
- }
- @$argv = @new_argv;
- $vmd_mod;
-}
-
sub input_eml_cb { # used by PublicInbox::LeiInput::input_fh
my ($self, $eml) = @_;
if (my $xoids = $self->{lei}->{ale}->xoids_for($eml)) {
@@ -99,7 +47,7 @@ sub lei_tag { # the "lei tag" method
$sto->write_prepare($lei);
my $self = bless { missing => 0 }, __PACKAGE__;
$lei->ale; # refresh and prepare
- my $vmd_mod = vmd_mod_extract(\@argv);
+ my $vmd_mod = $self->vmd_mod_extract(\@argv);
return $lei->fail(join("\n", @{$vmd_mod->{err}})) if $vmd_mod->{err};
$self->prepare_inputs($lei, \@argv) or return;
grep(defined, @$vmd_mod{qw(+kw +L -L -kw)}) or
@@ -161,7 +109,7 @@ sub _complete_mark_common ($) {
sub _complete_tag {
my ($self, @argv) = @_;
my @L = eval { $self->_lei_store->search->all_terms('L') };
- my @all = ((map { ("+kw:$_", "-kw:$_") } @KW),
+ my @all = ((map { ("+kw:$_", "-kw:$_") } @PublicInbox::LeiInput::KW),
(map { ("+L:$_", "-L:$_") } @L));
return @all if !@argv;
my ($cur, $re) = _complete_mark_common(\@argv);
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index ca1f3588..f066cc92 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -567,16 +567,39 @@ sub set_vmd {
$self->{xdb}->replace_document($docid, $doc);
}
+sub apply_vmd_mod ($$) {
+ my ($doc, $vmd_mod) = @_;
+ my $updated = 0;
+ my @x = @VMD_MAP;
+ while (my ($field, $pfx) = splice(@x, 0, 2)) {
+ # field: "label" or "kw"
+ for my $val (@{$vmd_mod->{"-$field"} // []}) {
+ eval {
+ $doc->remove_term($pfx . $val);
+ ++$updated;
+ };
+ }
+ for my $val (@{$vmd_mod->{"+$field"} // []}) {
+ $doc->add_boolean_term($pfx . $val);
+ ++$updated;
+ }
+ }
+ $updated;
+}
+
sub add_vmd {
my ($self, $docid, $vmd) = @_;
begin_txn_lazy($self);
my $doc = _get_doc($self, $docid) or return;
my @x = @VMD_MAP;
+ my $updated = 0;
while (my ($field, $pfx) = splice(@x, 0, 2)) {
my $add = $vmd->{$field} // next;
$doc->add_boolean_term($pfx . $_) for @$add;
+ $updated += scalar(@$add);
}
- $self->{xdb}->replace_document($docid, $doc);
+ $updated += apply_vmd_mod($doc, $vmd);
+ $self->{xdb}->replace_document($docid, $doc) if $updated;
}
sub remove_vmd {
@@ -601,21 +624,7 @@ sub update_vmd {
my ($self, $docid, $vmd_mod) = @_;
begin_txn_lazy($self);
my $doc = _get_doc($self, $docid) or return;
- my $updated = 0;
- my @x = @VMD_MAP;
- while (my ($field, $pfx) = splice(@x, 0, 2)) {
- # field: "label" or "kw"
- for my $val (@{$vmd_mod->{"-$field"} // []}) {
- eval {
- $doc->remove_term($pfx . $val);
- ++$updated;
- };
- }
- for my $val (@{$vmd_mod->{"+$field"} // []}) {
- $doc->add_boolean_term($pfx . $val);
- ++$updated;
- }
- }
+ my $updated = apply_vmd_mod($doc, $vmd_mod);
$self->{xdb}->replace_document($docid, $doc) if $updated;
$updated;
}
diff --git a/t/lei-import.t b/t/lei-import.t
index 8635df5a..6e9a853c 100644
--- a/t/lei-import.t
+++ b/t/lei-import.t
@@ -101,6 +101,16 @@ is_deeply($draft_a, $draft_b, 'fake Message-ID lookup') or
lei_ok('blob', '--mail', $draft_b->[0]->{blob});
is($lei_out, $eml_str, 'draft retrieved by blob');
+
+$eml_str = "Message-ID: <inbox\@example.com>\nSubject: label-this\n\n";
+lei_ok([qw(import -F eml - +kw:seen +L:inbox)],
+ undef, { %$lei_opt, 0 => \$eml_str });
+lei_ok(qw(q m:inbox@example.com));
+$res = json_utf8->decode($lei_out);
+is_deeply($res->[0]->{kw}, ['seen'], 'keyword set');
+is_deeply($res->[0]->{L}, ['inbox'], 'label set');
+
+
# see t/lei_to_mail.t for "import -F mbox*"
});
done_testing;
reply other threads:[~2021-04-23 11:22 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://public-inbox.org/README
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210423112201.99369-1-e@80x24.org \
--to=e@80x24.org \
--cc=meta@public-inbox.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).