From 4420ed836691b94d14c79127990aba27673759eb Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 21 May 2021 10:28:30 +0000 Subject: lei export-kw: new command to export keywords to Maildirs IMAP will eventually be supported. --- lib/PublicInbox/LEI.pm | 4 + lib/PublicInbox/LeiExportKw.pm | 180 +++++++++++++++++++++++++++++++++++++++++ lib/PublicInbox/LeiMailSync.pm | 10 +++ lib/PublicInbox/LeiSearch.pm | 14 ++++ lib/PublicInbox/LeiToMail.pm | 8 +- lib/PublicInbox/MdirReader.pm | 14 ++++ 6 files changed, 228 insertions(+), 2 deletions(-) create mode 100644 lib/PublicInbox/LeiExportKw.pm (limited to 'lib') diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm index 15680fe3..628908b5 100644 --- a/lib/PublicInbox/LEI.pm +++ b/lib/PublicInbox/LEI.pm @@ -243,6 +243,10 @@ our %CMD = ( # sorted in order of importance/use: qw(stdin| offset=i recursive|r exclude=s include|I=s lock=s@ in-format|F=s kw! verbose|v+ incremental! mail-sync!), qw(no-torsocks torsocks=s), PublicInbox::LeiQuery::curl_opt(), @c_opt ], + +'export-kw' => [ 'LOCATION...|--all', + 'one-time export of keywords of sync sources', + qw(all:s mode=s), @c_opt ], 'convert' => [ 'LOCATION...|--stdin', 'one-time conversion from URL or filesystem to another format', qw(stdin| in-format|F=s out-format|f=s output|mfolder|o=s lock=s@ kw!), diff --git a/lib/PublicInbox/LeiExportKw.pm b/lib/PublicInbox/LeiExportKw.pm new file mode 100644 index 00000000..db4f7441 --- /dev/null +++ b/lib/PublicInbox/LeiExportKw.pm @@ -0,0 +1,180 @@ +# Copyright (C) 2021 all contributors +# License: AGPL-3.0+ + +# front-end for the "lei export-kw" sub-command +package PublicInbox::LeiExportKw; +use strict; +use v5.10.1; +use parent qw(PublicInbox::IPC PublicInbox::LeiInput); +use Errno qw(EEXIST ENOENT); + +sub export_kw_md { # LeiMailSync->each_src callback + my ($oidbin, $id, $self, $mdir) = @_; + my $oidhex = unpack('H*', $oidbin); + my $sto_kw = $self->{lse}->oid_keywords($oidhex) or return; + my $bn = $$id; + my ($md_kw, $unknown, @try); + if ($bn =~ s/:2,([a-zA-Z]*)\z//) { + ($md_kw, $unknown) = PublicInbox::MdirReader::flags2kw($1); + @try = qw(cur new); + } else { + $unknown = []; + @try = qw(new cur); + } + if ($self->{-merge_kw} && $md_kw) { # merging keywords is the default + @$sto_kw{keys %$md_kw} = values(%$md_kw); + } + $bn .= ':2,'. + PublicInbox::LeiToMail::kw2suffix([keys %$sto_kw], @$unknown); + my $dst = "$mdir/cur/$bn"; + my @fail; + for my $d (@try) { + my $src = "$mdir/$d/$$id"; + next if $src eq $dst; + + # we use link(2) + unlink(2) since rename(2) may + # inadvertently clobber if the "uniquefilename" part wasn't + # actually unique. + if (link($src, $dst)) { # success + # unlink(2) may ENOENT from parallel invocation, + # ignore it, but not other serious errors + if (!unlink($src) and $! != ENOENT) { + $self->{lei}->child_error(1, + "E: unlink($src): $!"); + } + $self->{lms}->mv_src("maildir:$mdir", + $oidbin, $id, $bn) or die; + return; # success anyways if link(2) worked + } + if ($! == ENOENT && !-e $src) { # some other process moved it + $self->{lms}->clear_src("maildir:$mdir", $id); + next; + } + push @fail, $src if $! != EEXIST; + } + return unless @fail; + # both tries failed + my $e = $!; + my $orig = '['.join('|', @fail).']'; + $self->{lei}->child_error(1, "link($orig, $dst) ($oidhex): $e"); +} + +# overrides PublicInbox::LeiInput::input_path_url +sub input_path_url { + my ($self, $input, @args) = @_; + my $lms = $self->{lms} //= $self->{lse}->lms; + $lms->lms_begin; + if ($input =~ s/\Amaildir://i) { + require PublicInbox::LeiToMail; # kw2suffix + $lms->each_src("maildir:$input", \&export_kw_md, $self, $input); + } + $lms->lms_commit; +} + +sub lei_export_kw { + my ($lei, @folders) = @_; + my $sto = $lei->_lei_store or return $lei->fail(<search; + my $lms = $lse->lms or return $lei->fail(<{opt}; + my $all = $opt->{all}; + my @all = $lms->folders; + if (defined $all) { # --all= + my %x = map { $_ => $_ } split(/,/, $all); + my @ok = grep(defined, delete(@x{qw(local remote), ''})); + my @no = keys %x; + if (@no) { + @no = (join(',', @no)); + return $lei->fail(<fail("--all=$all not understood"); + } else { + @inc = @all; + } + for (@inc) { + push(@folders, $_) unless $seen{$_}++; + } + } + return $lei->fail(< 1 } @all; + my @no; + for (@folders) { + next if $all{$_}; # ok + if (-d "$_/new" && -d "$_/cur") { + my $d = 'maildir:'.$lei->rel2abs($_); + push(@no, $_) unless $all{$d}; + $_ = $d; + } else { + push @no, $_; + } + } + my $no = join("\n\t", @no); + return $lei->fail(< $lse }, __PACKAGE__; + $lei->{opt}->{'mail-sync'} = 1; # for prepare_inputs + $self->prepare_inputs($lei, \@folders) or return; + my $j = $opt->{jobs} // scalar(@{$self->{inputs}}) || 1; + if (my @ro = grep(!/\A(?:maildir|imaps?):/, @folders)) { + return $lei->fail("cannot export to read-only folders: @ro"); + } + if (my $net = $lei->{net}) { + require PublicInbox::NetWriter; + bless $net, 'PublicInbox::NetWriter'; + } + undef $lms; + my $m = $opt->{mode} // 'merge'; + if ($m eq 'merge') { # default + $self->{-merge_kw} = 1; + } elsif ($m eq 'set') { + } else { + return $lei->fail(<{auth}->op_merge($ops, $self) if $lei->{auth}; + $self->{-wq_nr_workers} = $j // 1; # locked + (my $op_c, $ops) = $lei->workers_start($self, $j, $ops); + $lei->{wq1} = $self; + $lei->{-err_type} = 'non-fatal'; + net_merge_all_done($self) unless $lei->{auth}; + $op_c->op_wait_event($ops); # calls net_merge_all_done if $lei->{auth} +} + +sub _complete_export_kw { + my ($lei, @argv) = @_; + my $sto = $lei->_lei_store or return; + my $lms = $sto->search->lms or return; + my $match_cb = $lei->complete_url_prepare(\@argv); + map { $match_cb->($_) } $lms->folders; +} + +no warnings 'once'; + +*ipc_atfork_child = \&PublicInbox::LeiInput::input_only_atfork_child; +*net_merge_all_done = \&PublicInbox::LeiInput::input_only_net_merge_all_done; + +# the following works even when LeiAuth is lazy-loaded +*net_merge_all = \&PublicInbox::LeiAuth::net_merge_all; + +1; diff --git a/lib/PublicInbox/LeiMailSync.pm b/lib/PublicInbox/LeiMailSync.pm index 3bada42d..32e17c65 100644 --- a/lib/PublicInbox/LeiMailSync.pm +++ b/lib/PublicInbox/LeiMailSync.pm @@ -138,6 +138,16 @@ DELETE FROM blob2num WHERE fid = ? AND uid = ? $sth->execute($fid, $id); } +# Maildir-only +sub mv_src { + my ($self, $folder, $oidbin, $id, $newbn) = @_; + my $fid = $self->{fmap}->{$folder} //= _fid_for($self, $folder, 1); + my $sth = $self->{dbh}->prepare_cached(<<''); +UPDATE blob2name SET name = ? WHERE fid = ? AND oidbin = ? AND name = ? + + $sth->execute($newbn, $fid, $oidbin, $$id); +} + # read-only, iterates every oidbin + UID or name for a given folder sub each_src { my ($self, $folder, $cb, @args) = @_; diff --git a/lib/PublicInbox/LeiSearch.pm b/lib/PublicInbox/LeiSearch.pm index fb19229f..9297d060 100644 --- a/lib/PublicInbox/LeiSearch.pm +++ b/lib/PublicInbox/LeiSearch.pm @@ -27,6 +27,20 @@ sub msg_keywords { wantarray ? sort(keys(%$kw)) : $kw; } +# returns undef if blob is unknown +sub oid_keywords { + my ($self, $oidhex) = @_; + my @num = $self->over->blob_exists($oidhex) or return; + my $xdb = $self->xdb; # set {nshard}; + my %kw; + for my $num (@num) { # there should only be one... + my $doc = $xdb->get_document(num2docid($self, $num)); + my $x = xap_terms('K', $doc); + %kw = (%kw, %$x); + } + \%kw; +} + # lookup keywords+labels for external messages sub xsmsg_vmd { my ($self, $smsg, $want_label) = @_; diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm index 0cbdff8b..96a1f881 100644 --- a/lib/PublicInbox/LeiToMail.pm +++ b/lib/PublicInbox/LeiToMail.pm @@ -243,10 +243,14 @@ sub _rand () { sprintf('%x,%x,%x,%x', rand(0xffffffff), time, $$, ++$seq); } +sub kw2suffix ($;@) { + my $kw = shift; + join('', sort(map { $kw2char{$_} // () } @$kw, @_)); +} + sub _buf2maildir { my ($dst, $buf, $smsg) = @_; my $kw = $smsg->{kw} // []; - my $sfx = join('', sort(map { $kw2char{$_} // () } @$kw)); my $rand = ''; # chosen by die roll :P my ($tmp, $fh, $base, $ok); my $common = $smsg->{blob} // _rand; @@ -263,7 +267,7 @@ sub _buf2maildir { $dst .= 'cur/'; $rand = ''; do { - $base = $rand.$common.':2,'.$sfx + $base = $rand.$common.':2,'.kw2suffix($kw); } while (!($ok = link($tmp, $dst.$base)) && $!{EEXIST} && ($rand = _rand.',')); die "link($tmp, $dst$base): $!" unless $ok; diff --git a/lib/PublicInbox/MdirReader.pm b/lib/PublicInbox/MdirReader.pm index 7a0641fb..304be63d 100644 --- a/lib/PublicInbox/MdirReader.pm +++ b/lib/PublicInbox/MdirReader.pm @@ -86,4 +86,18 @@ sub maildir_each_eml { sub new { bless {}, __PACKAGE__ } +sub flags2kw ($) { + my @unknown; + my %kw; + for (split(//, $_[0])) { + my $k = $c2kw{$_}; + if (defined($k)) { + $kw{$k} = 1; + } else { + push @unknown, $_; + } + } + (\%kw, \@unknown); +} + 1; -- cgit v1.2.3-24-ge0c7