From 40f3f2a2c805fc37c7ed35a60948856bd962b493 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 4 May 2021 09:49:12 +0000 Subject: lei index: new command to index mail w/o git storage Since completely purging blobs from git is slow, users may wish to index messages in Maildirs (and eventually other local storage) without storing data in git. Much code from LeiImport and LeiInput is reused, and a new dummy FakeImport class supplies a non-storing $im->add and minimize changes to LeiStore. The tricky part of this command is to support "lei import" after a message has gone through "lei index". Relying on $smsg->{bytes} == 0 (as we do for external-only vmd storage) does not work here, since it would break searching for "z:" byte-ranges when not using externals. This eventually required PublicInbox::Import::add to use a SharedKV to keep track of imported blobs and prevent duplication. --- lib/PublicInbox/LeiInput.pm | 30 +++++++++--------------------- 1 file changed, 9 insertions(+), 21 deletions(-) (limited to 'lib/PublicInbox/LeiInput.pm') diff --git a/lib/PublicInbox/LeiInput.pm b/lib/PublicInbox/LeiInput.pm index 85caac35..46eea111 100644 --- a/lib/PublicInbox/LeiInput.pm +++ b/lib/PublicInbox/LeiInput.pm @@ -1,7 +1,7 @@ # Copyright (C) 2021 all contributors # License: AGPL-3.0+ -# parent class for LeiImport, LeiConvert +# parent class for LeiImport, LeiConvert, LeiIndex package PublicInbox::LeiInput; use strict; use v5.10.1; @@ -93,11 +93,7 @@ sub handle_http_input ($$@) { my ($fh, $pid) = popen_rd($cmd, undef, $rdr); grep(/\A--compressed\z/, @$curl) or $fh = IO::Uncompress::Gunzip->new($fh, MultiStream => 1); - eval { - PublicInbox::MboxReader->mboxrd($fh, - $self->can('input_mbox_cb'), - $self, @args); - }; + eval { $self->input_fh('mboxrd', $fh, $url, @args) }; my $err = $@; waitpid($pid, 0); $? || $err and @@ -221,14 +217,8 @@ sub prepare_inputs { # returns undef on error require PublicInbox::NetReader; $net //= PublicInbox::NetReader->new; $net->add_url($input); - if ($sync) { - if ($input =~ m!\Aimaps?://!) { - push @{$sync->{ok}}, $input; - } else { - push @{$sync->{no}}, $input; - } - } - } elsif ($input_path =~ m!\Ahttps?://!i) { + push @{$sync->{ok}}, $input if $sync; + } elsif ($input_path =~ m!\Ahttps?://!i) { # mboxrd.gz # TODO: how would we detect r/w JMAP? push @{$sync->{no}}, $input if $sync; prepare_http_input($self, $lei, $input_path) or return; @@ -239,12 +229,10 @@ sub prepare_inputs { # returns undef on error --in-format=$in_fmt and `$ifmt:' conflict } - if ($sync) { - if ($ifmt =~ /\A(?:maildir|mh)\z/i) { - push @{$sync->{ok}}, $input; - } else { - push @{$sync->{no}}, $input; - } + if ($ifmt =~ /\A(?:maildir|mh)\z/i) { + push @{$sync->{ok}}, $input if $sync; + } else { + push @{$sync->{no}}, $input if $sync; } my $devfd = $lei->path_to_fd($input_path) // return; if ($devfd >= 0 || (-f $input_path || -p _)) { @@ -260,7 +248,7 @@ sub prepare_inputs { # returns undef on error } else { return $lei->fail("Unable to handle $input"); } - } elsif ($input =~ /\.(eml|patch)\z/i && -f $input) { + } elsif ($input =~ /\.(?:eml|patch)\z/i && -f $input) { lc($in_fmt//'eml') eq 'eml' or return $lei->fail(<<""); $input is `eml', not --in-format=$in_fmt -- cgit v1.2.3-24-ge0c7