From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id E241C1F5AE for ; Fri, 23 Apr 2021 11:22:02 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH] lei import: support adding keywords and labels on import Date: Fri, 23 Apr 2021 07:22:01 -0400 Message-Id: <20210423112201.99369-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: This saves some work and makes it easier to set volatile metadata on a message at import time. --- lib/PublicInbox/LeiImport.pm | 7 +++++ lib/PublicInbox/LeiInput.pm | 54 ++++++++++++++++++++++++++++++++++ lib/PublicInbox/LeiTag.pm | 56 ++---------------------------------- lib/PublicInbox/SearchIdx.pm | 41 +++++++++++++++----------- t/lei-import.t | 10 +++++++ 5 files changed, 98 insertions(+), 70 deletions(-) diff --git a/lib/PublicInbox/LeiImport.pm b/lib/PublicInbox/LeiImport.pm index accf08f5..e3c756e8 100644 --- a/lib/PublicInbox/LeiImport.pm +++ b/lib/PublicInbox/LeiImport.pm @@ -12,6 +12,10 @@ use parent qw(PublicInbox::IPC PublicInbox::LeiInput); sub input_eml_cb { # used by PublicInbox::LeiInput::input_fh my ($self, $eml, $vmd) = @_; my $xoids = $self->{lei}->{ale}->xoids_for($eml); + if (my $all_vmd = $self->{all_vmd}) { + $vmd //= {}; + @$vmd{keys %$all_vmd} = values %$all_vmd; + } $self->{lei}->{sto}->ipc_do('set_eml', $eml, $vmd, $xoids); } @@ -53,6 +57,9 @@ sub lei_import { # the main "lei import" method $sto->write_prepare($lei); my $self = bless {}, __PACKAGE__; $self->{-import_kw} = $lei->{opt}->{kw} // 1; + my $vmd_mod = $self->vmd_mod_extract(\@inputs); + return $lei->fail(join("\n", @{$vmd_mod->{err}})) if $vmd_mod->{err}; + $self->{all_vmd} = $vmd_mod if scalar keys %$vmd_mod; $self->prepare_inputs($lei, \@inputs) or return; $lei->ale; # initialize for workers to read my $j = $lei->{opt}->{jobs} // scalar(@{$self->{inputs}}) || 1; diff --git a/lib/PublicInbox/LeiInput.pm b/lib/PublicInbox/LeiInput.pm index e416d3ed..de60a076 100644 --- a/lib/PublicInbox/LeiInput.pm +++ b/lib/PublicInbox/LeiInput.pm @@ -7,6 +7,38 @@ use strict; use v5.10.1; use PublicInbox::DS; +# JMAP RFC 8621 4.1.1 +# https://www.iana.org/assignments/imap-jmap-keywords/imap-jmap-keywords.xhtml +our @KW = (qw(seen answered flagged draft), # widely-compatible + qw(forwarded), # IMAP + Maildir + qw(phishing junk notjunk)); # rarely supported + +# note: RFC 8621 states "Users may add arbitrary keywords to an Email", +# but is it good idea? Stick to the system and reserved ones, for now. +# The widely-compatible ones map to IMAP system flags, Maildir flags +# and mbox Status/X-Status headers. +my %KW = map { $_ => 1 } @KW; +my $L_MAX = 244; # Xapian term limit - length('L') + +# RFC 8621, sec 2 (Mailboxes) a "label" for us is a JMAP Mailbox "name" +# "Servers MAY reject names that violate server policy" +my %ERR = ( + L => sub { + my ($label) = @_; + length($label) >= $L_MAX and + return "`$label' too long (must be <= $L_MAX)"; + $label =~ m{\A[a-z0-9_](?:[a-z0-9_\-\./\@,]*[a-z0-9])?\z}i ? + undef : "`$label' is invalid"; + }, + kw => sub { + my ($kw) = @_; + $KW{$kw} ? undef : < update_vmd +sub vmd_mod_extract { + my $argv = $_[-1]; + my $vmd_mod = {}; + my @new_argv; + for my $x (@$argv) { + if ($x =~ /\A(\+|\-)(kw|L):(.+)\z/) { + my ($op, $pfx, $val) = ($1, $2, $3); + if (my $err = $ERR{$pfx}->($val)) { + push @{$vmd_mod->{err}}, $err; + } else { # set "+kw", "+L", "-L", "-kw" + push @{$vmd_mod->{$op.$pfx}}, $val; + } + } else { + push @new_argv, $x; + } + } + @$argv = @new_argv; + $vmd_mod; +} + 1; diff --git a/lib/PublicInbox/LeiTag.pm b/lib/PublicInbox/LeiTag.pm index f019202f..f5791947 100644 --- a/lib/PublicInbox/LeiTag.pm +++ b/lib/PublicInbox/LeiTag.pm @@ -7,58 +7,6 @@ use strict; use v5.10.1; use parent qw(PublicInbox::IPC PublicInbox::LeiInput); -# JMAP RFC 8621 4.1.1 -# https://www.iana.org/assignments/imap-jmap-keywords/imap-jmap-keywords.xhtml -my @KW = (qw(seen answered flagged draft), # widely-compatible - qw(forwarded phishing junk notjunk)); # rarely supported -# note: RFC 8621 states "Users may add arbitrary keywords to an Email", -# but is it good idea? Stick to the system and reserved ones, for now. -# The widely-compatible ones map to IMAP system flags, Maildir flags -# and mbox Status/X-Status headers. -my %KW = map { $_ => 1 } @KW; -my $L_MAX = 244; # Xapian term limit - length('L') - -# RFC 8621, sec 2 (Mailboxes) a "label" for us is a JMAP Mailbox "name" -# "Servers MAY reject names that violate server policy" -my %ERR = ( - L => sub { - my ($label) = @_; - length($label) >= $L_MAX and - return "`$label' too long (must be <= $L_MAX)"; - $label =~ m{\A[a-z0-9_](?:[a-z0-9_\-\./\@,]*[a-z0-9])?\z}i ? - undef : "`$label' is invalid"; - }, - kw => sub { - my ($kw) = @_; - $KW{$kw} ? undef : < update_vmd -sub vmd_mod_extract { - my $argv = $_[-1]; - my $vmd_mod = {}; - my @new_argv; - for my $x (@$argv) { - if ($x =~ /\A(\+|\-)(kw|L):(.+)\z/) { - my ($op, $pfx, $val) = ($1, $2, $3); - if (my $err = $ERR{$pfx}->($val)) { - push @{$vmd_mod->{err}}, $err; - } else { # set "+kw", "+L", "-L", "-kw" - push @{$vmd_mod->{$op.$pfx}}, $val; - } - } else { - push @new_argv, $x; - } - } - @$argv = @new_argv; - $vmd_mod; -} - sub input_eml_cb { # used by PublicInbox::LeiInput::input_fh my ($self, $eml) = @_; if (my $xoids = $self->{lei}->{ale}->xoids_for($eml)) { @@ -99,7 +47,7 @@ sub lei_tag { # the "lei tag" method $sto->write_prepare($lei); my $self = bless { missing => 0 }, __PACKAGE__; $lei->ale; # refresh and prepare - my $vmd_mod = vmd_mod_extract(\@argv); + my $vmd_mod = $self->vmd_mod_extract(\@argv); return $lei->fail(join("\n", @{$vmd_mod->{err}})) if $vmd_mod->{err}; $self->prepare_inputs($lei, \@argv) or return; grep(defined, @$vmd_mod{qw(+kw +L -L -kw)}) or @@ -161,7 +109,7 @@ sub _complete_mark_common ($) { sub _complete_tag { my ($self, @argv) = @_; my @L = eval { $self->_lei_store->search->all_terms('L') }; - my @all = ((map { ("+kw:$_", "-kw:$_") } @KW), + my @all = ((map { ("+kw:$_", "-kw:$_") } @PublicInbox::LeiInput::KW), (map { ("+L:$_", "-L:$_") } @L)); return @all if !@argv; my ($cur, $re) = _complete_mark_common(\@argv); diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index ca1f3588..f066cc92 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -567,16 +567,39 @@ sub set_vmd { $self->{xdb}->replace_document($docid, $doc); } +sub apply_vmd_mod ($$) { + my ($doc, $vmd_mod) = @_; + my $updated = 0; + my @x = @VMD_MAP; + while (my ($field, $pfx) = splice(@x, 0, 2)) { + # field: "label" or "kw" + for my $val (@{$vmd_mod->{"-$field"} // []}) { + eval { + $doc->remove_term($pfx . $val); + ++$updated; + }; + } + for my $val (@{$vmd_mod->{"+$field"} // []}) { + $doc->add_boolean_term($pfx . $val); + ++$updated; + } + } + $updated; +} + sub add_vmd { my ($self, $docid, $vmd) = @_; begin_txn_lazy($self); my $doc = _get_doc($self, $docid) or return; my @x = @VMD_MAP; + my $updated = 0; while (my ($field, $pfx) = splice(@x, 0, 2)) { my $add = $vmd->{$field} // next; $doc->add_boolean_term($pfx . $_) for @$add; + $updated += scalar(@$add); } - $self->{xdb}->replace_document($docid, $doc); + $updated += apply_vmd_mod($doc, $vmd); + $self->{xdb}->replace_document($docid, $doc) if $updated; } sub remove_vmd { @@ -601,21 +624,7 @@ sub update_vmd { my ($self, $docid, $vmd_mod) = @_; begin_txn_lazy($self); my $doc = _get_doc($self, $docid) or return; - my $updated = 0; - my @x = @VMD_MAP; - while (my ($field, $pfx) = splice(@x, 0, 2)) { - # field: "label" or "kw" - for my $val (@{$vmd_mod->{"-$field"} // []}) { - eval { - $doc->remove_term($pfx . $val); - ++$updated; - }; - } - for my $val (@{$vmd_mod->{"+$field"} // []}) { - $doc->add_boolean_term($pfx . $val); - ++$updated; - } - } + my $updated = apply_vmd_mod($doc, $vmd_mod); $self->{xdb}->replace_document($docid, $doc) if $updated; $updated; } diff --git a/t/lei-import.t b/t/lei-import.t index 8635df5a..6e9a853c 100644 --- a/t/lei-import.t +++ b/t/lei-import.t @@ -101,6 +101,16 @@ is_deeply($draft_a, $draft_b, 'fake Message-ID lookup') or lei_ok('blob', '--mail', $draft_b->[0]->{blob}); is($lei_out, $eml_str, 'draft retrieved by blob'); + +$eml_str = "Message-ID: \nSubject: label-this\n\n"; +lei_ok([qw(import -F eml - +kw:seen +L:inbox)], + undef, { %$lei_opt, 0 => \$eml_str }); +lei_ok(qw(q m:inbox@example.com)); +$res = json_utf8->decode($lei_out); +is_deeply($res->[0]->{kw}, ['seen'], 'keyword set'); +is_deeply($res->[0]->{L}, ['inbox'], 'label set'); + + # see t/lei_to_mail.t for "import -F mbox*" }); done_testing;