about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2021-04-23 07:22:01 -0400
committerEric Wong <e@80x24.org>2021-04-23 20:55:38 +0000
commitaaa4b3a3fd2d7805afd412b3de2fca07a5e8898e (patch)
tree79264102d4305a395df554af0fb31ca702fdbc30
parent0831cd4c510deb97e755aa09a808bcf5f9e9deeb (diff)
downloadpublic-inbox-aaa4b3a3fd2d7805afd412b3de2fca07a5e8898e.tar.gz
This saves some work and makes it easier to set volatile
metadata on a message at import time.
-rw-r--r--lib/PublicInbox/LeiImport.pm7
-rw-r--r--lib/PublicInbox/LeiInput.pm54
-rw-r--r--lib/PublicInbox/LeiTag.pm56
-rw-r--r--lib/PublicInbox/SearchIdx.pm41
-rw-r--r--t/lei-import.t10
5 files changed, 98 insertions, 70 deletions
diff --git a/lib/PublicInbox/LeiImport.pm b/lib/PublicInbox/LeiImport.pm
index accf08f5..e3c756e8 100644
--- a/lib/PublicInbox/LeiImport.pm
+++ b/lib/PublicInbox/LeiImport.pm
@@ -12,6 +12,10 @@ use parent qw(PublicInbox::IPC PublicInbox::LeiInput);
 sub input_eml_cb { # used by PublicInbox::LeiInput::input_fh
         my ($self, $eml, $vmd) = @_;
         my $xoids = $self->{lei}->{ale}->xoids_for($eml);
+        if (my $all_vmd = $self->{all_vmd}) {
+                $vmd //= {};
+                @$vmd{keys %$all_vmd} = values %$all_vmd;
+        }
         $self->{lei}->{sto}->ipc_do('set_eml', $eml, $vmd, $xoids);
 }
 
@@ -53,6 +57,9 @@ sub lei_import { # the main "lei import" method
         $sto->write_prepare($lei);
         my $self = bless {}, __PACKAGE__;
         $self->{-import_kw} = $lei->{opt}->{kw} // 1;
+        my $vmd_mod = $self->vmd_mod_extract(\@inputs);
+        return $lei->fail(join("\n", @{$vmd_mod->{err}})) if $vmd_mod->{err};
+        $self->{all_vmd} = $vmd_mod if scalar keys %$vmd_mod;
         $self->prepare_inputs($lei, \@inputs) or return;
         $lei->ale; # initialize for workers to read
         my $j = $lei->{opt}->{jobs} // scalar(@{$self->{inputs}}) || 1;
diff --git a/lib/PublicInbox/LeiInput.pm b/lib/PublicInbox/LeiInput.pm
index e416d3ed..de60a076 100644
--- a/lib/PublicInbox/LeiInput.pm
+++ b/lib/PublicInbox/LeiInput.pm
@@ -7,6 +7,38 @@ use strict;
 use v5.10.1;
 use PublicInbox::DS;
 
+# JMAP RFC 8621 4.1.1
+# https://www.iana.org/assignments/imap-jmap-keywords/imap-jmap-keywords.xhtml
+our @KW = (qw(seen answered flagged draft), # widely-compatible
+        qw(forwarded), # IMAP + Maildir
+        qw(phishing junk notjunk)); # rarely supported
+
+# note: RFC 8621 states "Users may add arbitrary keywords to an Email",
+# but is it good idea?  Stick to the system and reserved ones, for now.
+# The widely-compatible ones map to IMAP system flags, Maildir flags
+# and mbox Status/X-Status headers.
+my %KW = map { $_ => 1 } @KW;
+my $L_MAX = 244; # Xapian term limit - length('L')
+
+# RFC 8621, sec 2 (Mailboxes) a "label" for us is a JMAP Mailbox "name"
+# "Servers MAY reject names that violate server policy"
+my %ERR = (
+        L => sub {
+                my ($label) = @_;
+                length($label) >= $L_MAX and
+                        return "`$label' too long (must be <= $L_MAX)";
+                $label =~ m{\A[a-z0-9_](?:[a-z0-9_\-\./\@,]*[a-z0-9])?\z}i ?
+                        undef : "`$label' is invalid";
+        },
+        kw => sub {
+                my ($kw) = @_;
+                $KW{$kw} ? undef : <<EOM;
+`$kw' is not one of: `seen', `flagged', `answered', `draft'
+`junk', `notjunk', `phishing' or `forwarded'
+EOM
+        }
+);
+
 sub check_input_format ($;$) {
         my ($lei, $files) = @_;
         my $opt_key = 'in-format';
@@ -183,4 +215,26 @@ sub input_only_atfork_child {
         undef;
 }
 
+# like Getopt::Long, but for +kw:FOO and -kw:FOO to prepare
+# for update_xvmd -> update_vmd
+sub vmd_mod_extract {
+        my $argv = $_[-1];
+        my $vmd_mod = {};
+        my @new_argv;
+        for my $x (@$argv) {
+                if ($x =~ /\A(\+|\-)(kw|L):(.+)\z/) {
+                        my ($op, $pfx, $val) = ($1, $2, $3);
+                        if (my $err = $ERR{$pfx}->($val)) {
+                                push @{$vmd_mod->{err}}, $err;
+                        } else { # set "+kw", "+L", "-L", "-kw"
+                                push @{$vmd_mod->{$op.$pfx}}, $val;
+                        }
+                } else {
+                        push @new_argv, $x;
+                }
+        }
+        @$argv = @new_argv;
+        $vmd_mod;
+}
+
 1;
diff --git a/lib/PublicInbox/LeiTag.pm b/lib/PublicInbox/LeiTag.pm
index f019202f..f5791947 100644
--- a/lib/PublicInbox/LeiTag.pm
+++ b/lib/PublicInbox/LeiTag.pm
@@ -7,58 +7,6 @@ use strict;
 use v5.10.1;
 use parent qw(PublicInbox::IPC PublicInbox::LeiInput);
 
-# JMAP RFC 8621 4.1.1
-# https://www.iana.org/assignments/imap-jmap-keywords/imap-jmap-keywords.xhtml
-my @KW = (qw(seen answered flagged draft), # widely-compatible
-        qw(forwarded phishing junk notjunk)); # rarely supported
-# note: RFC 8621 states "Users may add arbitrary keywords to an Email",
-# but is it good idea?  Stick to the system and reserved ones, for now.
-# The widely-compatible ones map to IMAP system flags, Maildir flags
-# and mbox Status/X-Status headers.
-my %KW = map { $_ => 1 } @KW;
-my $L_MAX = 244; # Xapian term limit - length('L')
-
-# RFC 8621, sec 2 (Mailboxes) a "label" for us is a JMAP Mailbox "name"
-# "Servers MAY reject names that violate server policy"
-my %ERR = (
-        L => sub {
-                my ($label) = @_;
-                length($label) >= $L_MAX and
-                        return "`$label' too long (must be <= $L_MAX)";
-                $label =~ m{\A[a-z0-9_](?:[a-z0-9_\-\./\@,]*[a-z0-9])?\z}i ?
-                        undef : "`$label' is invalid";
-        },
-        kw => sub {
-                my ($kw) = @_;
-                $KW{$kw} ? undef : <<EOM;
-`$kw' is not one of: `seen', `flagged', `answered', `draft'
-`junk', `notjunk', `phishing' or `forwarded'
-EOM
-        }
-);
-
-# like Getopt::Long, but for +kw:FOO and -kw:FOO to prepare
-# for update_xvmd -> update_vmd
-sub vmd_mod_extract {
-        my $argv = $_[-1];
-        my $vmd_mod = {};
-        my @new_argv;
-        for my $x (@$argv) {
-                if ($x =~ /\A(\+|\-)(kw|L):(.+)\z/) {
-                        my ($op, $pfx, $val) = ($1, $2, $3);
-                        if (my $err = $ERR{$pfx}->($val)) {
-                                push @{$vmd_mod->{err}}, $err;
-                        } else { # set "+kw", "+L", "-L", "-kw"
-                                push @{$vmd_mod->{$op.$pfx}}, $val;
-                        }
-                } else {
-                        push @new_argv, $x;
-                }
-        }
-        @$argv = @new_argv;
-        $vmd_mod;
-}
-
 sub input_eml_cb { # used by PublicInbox::LeiInput::input_fh
         my ($self, $eml) = @_;
         if (my $xoids = $self->{lei}->{ale}->xoids_for($eml)) {
@@ -99,7 +47,7 @@ sub lei_tag { # the "lei tag" method
         $sto->write_prepare($lei);
         my $self = bless { missing => 0 }, __PACKAGE__;
         $lei->ale; # refresh and prepare
-        my $vmd_mod = vmd_mod_extract(\@argv);
+        my $vmd_mod = $self->vmd_mod_extract(\@argv);
         return $lei->fail(join("\n", @{$vmd_mod->{err}})) if $vmd_mod->{err};
         $self->prepare_inputs($lei, \@argv) or return;
         grep(defined, @$vmd_mod{qw(+kw +L -L -kw)}) or
@@ -161,7 +109,7 @@ sub _complete_mark_common ($) {
 sub _complete_tag {
         my ($self, @argv) = @_;
         my @L = eval { $self->_lei_store->search->all_terms('L') };
-        my @all = ((map { ("+kw:$_", "-kw:$_") } @KW),
+        my @all = ((map { ("+kw:$_", "-kw:$_") } @PublicInbox::LeiInput::KW),
                 (map { ("+L:$_", "-L:$_") } @L));
         return @all if !@argv;
         my ($cur, $re) = _complete_mark_common(\@argv);
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index ca1f3588..f066cc92 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -567,16 +567,39 @@ sub set_vmd {
         $self->{xdb}->replace_document($docid, $doc);
 }
 
+sub apply_vmd_mod ($$) {
+        my ($doc, $vmd_mod) = @_;
+        my $updated = 0;
+        my @x = @VMD_MAP;
+        while (my ($field, $pfx) = splice(@x, 0, 2)) {
+                # field: "label" or "kw"
+                for my $val (@{$vmd_mod->{"-$field"} // []}) {
+                        eval {
+                                $doc->remove_term($pfx . $val);
+                                ++$updated;
+                        };
+                }
+                for my $val (@{$vmd_mod->{"+$field"} // []}) {
+                        $doc->add_boolean_term($pfx . $val);
+                        ++$updated;
+                }
+        }
+        $updated;
+}
+
 sub add_vmd {
         my ($self, $docid, $vmd) = @_;
         begin_txn_lazy($self);
         my $doc = _get_doc($self, $docid) or return;
         my @x = @VMD_MAP;
+        my $updated = 0;
         while (my ($field, $pfx) = splice(@x, 0, 2)) {
                 my $add = $vmd->{$field} // next;
                 $doc->add_boolean_term($pfx . $_) for @$add;
+                $updated += scalar(@$add);
         }
-        $self->{xdb}->replace_document($docid, $doc);
+        $updated += apply_vmd_mod($doc, $vmd);
+        $self->{xdb}->replace_document($docid, $doc) if $updated;
 }
 
 sub remove_vmd {
@@ -601,21 +624,7 @@ sub update_vmd {
         my ($self, $docid, $vmd_mod) = @_;
         begin_txn_lazy($self);
         my $doc = _get_doc($self, $docid) or return;
-        my $updated = 0;
-        my @x = @VMD_MAP;
-        while (my ($field, $pfx) = splice(@x, 0, 2)) {
-                # field: "label" or "kw"
-                for my $val (@{$vmd_mod->{"-$field"} // []}) {
-                        eval {
-                                $doc->remove_term($pfx . $val);
-                                ++$updated;
-                        };
-                }
-                for my $val (@{$vmd_mod->{"+$field"} // []}) {
-                        $doc->add_boolean_term($pfx . $val);
-                        ++$updated;
-                }
-        }
+        my $updated = apply_vmd_mod($doc, $vmd_mod);
         $self->{xdb}->replace_document($docid, $doc) if $updated;
         $updated;
 }
diff --git a/t/lei-import.t b/t/lei-import.t
index 8635df5a..6e9a853c 100644
--- a/t/lei-import.t
+++ b/t/lei-import.t
@@ -101,6 +101,16 @@ is_deeply($draft_a, $draft_b, 'fake Message-ID lookup') or
 lei_ok('blob', '--mail', $draft_b->[0]->{blob});
 is($lei_out, $eml_str, 'draft retrieved by blob');
 
+
+$eml_str = "Message-ID: <inbox\@example.com>\nSubject: label-this\n\n";
+lei_ok([qw(import -F eml - +kw:seen +L:inbox)],
+        undef, { %$lei_opt, 0 => \$eml_str });
+lei_ok(qw(q m:inbox@example.com));
+$res = json_utf8->decode($lei_out);
+is_deeply($res->[0]->{kw}, ['seen'], 'keyword set');
+is_deeply($res->[0]->{L}, ['inbox'], 'label set');
+
+
 # see t/lei_to_mail.t for "import -F mbox*"
 });
 done_testing;