From f0731b9b653640afd8078aed994814b3049f230a Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 25 Feb 2021 05:37:18 +0600 Subject: lei q: auto-memoize remote messages into lei/store This lets users avoid network traffic on subsequent searches at the expense of local disk space. --no-import-remote may be specified to reverse this trade-off for users with little storage. --- lib/PublicInbox/LEI.pm | 2 ++ lib/PublicInbox/LeiQuery.pm | 1 + lib/PublicInbox/LeiXSearch.pm | 10 +++++++--- 3 files changed, 10 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm index 8cd95ac2..50665b3e 100644 --- a/lib/PublicInbox/LEI.pm +++ b/lib/PublicInbox/LEI.pm @@ -112,6 +112,7 @@ our %CMD = ( # sorted in order of importance/use: save-as=s output|mfolder|o=s format|f=s dedupe|d=s threads|t augment|a sort|s=s reverse|r offset=i remote! local! external! pretty include|I=s@ exclude=s@ only=s@ jobs|j=s globoff|g stdin| + import-remote! alert=s@ mua=s no-torsocks torsocks=s verbose|v+ quiet|q C=s@), PublicInbox::LeiQuery::curl_opt(), opt_dash('limit|n=i', '[0-9]+') ], @@ -225,6 +226,7 @@ my %OPTDESC = ( 'whether or not to wrap git and curl commands with torsocks'], 'no-torsocks' => 'alias for --torsocks=no', 'save-as=s' => ['NAME', 'save a search terms by given name'], +'import-remote!' => 'do not memoize remote messages into local store', 'type=s' => [ 'any|mid|git', 'disambiguate type' ], diff --git a/lib/PublicInbox/LeiQuery.pm b/lib/PublicInbox/LeiQuery.pm index 743fa3f7..b57d1cc5 100644 --- a/lib/PublicInbox/LeiQuery.pm +++ b/lib/PublicInbox/LeiQuery.pm @@ -51,6 +51,7 @@ sub lei_q { # we'll allow "--only $LOCATION --local" my $sto = $self->_lei_store(1); my $lse = $sto->search; + $sto->write_prepare($self) if $opt->{'import-remote'} //= 1; if ($opt->{'local'} //= scalar(@only) ? 0 : 1) { $lxs->prepare_external($lse); } diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm index c46aba3b..2d399653 100644 --- a/lib/PublicInbox/LeiXSearch.pm +++ b/lib/PublicInbox/LeiXSearch.pm @@ -189,8 +189,9 @@ sub query_mset { # non-parallel for non-"--threads" users $lei->{ovv}->ovv_atexit_child($lei); } -sub each_eml { # callback for MboxReader->mboxrd +sub each_remote_eml { # callback for MboxReader->mboxrd my ($eml, $self, $lei, $each_smsg) = @_; + $lei->{sto}->ipc_do('set_eml', $eml) if $lei->{sto}; # --import-remote my $smsg = bless {}, 'PublicInbox::Smsg'; $smsg->populate($eml); $smsg->parse_references($eml, mids($eml)); @@ -244,14 +245,17 @@ sub query_remote_mboxrd { my ($fh, $pid) = popen_rd($cmd, undef, $rdr); $reap_curl = PublicInbox::OnDestroy->new($sigint_reap, $pid); $fh = IO::Uncompress::Gunzip->new($fh); - PublicInbox::MboxReader->mboxrd($fh, \&each_eml, $self, + PublicInbox::MboxReader->mboxrd($fh, \&each_remote_eml, $self, $lei, $each_smsg); my $err = waitpid($pid, 0) == $pid ? undef : "BUG: waitpid($cmd): $!"; @$reap_curl = (); # cancel OnDestroy die $err if $err; + my $nr = $lei->{-nr_remote_eml}; + if ($nr && $lei->{sto}) { + my $wait = $lei->{sto}->ipc_do('done'); + } if ($? == 0) { - my $nr = $lei->{-nr_remote_eml}; mset_progress($lei, $lei->{-current_url}, $nr, $nr); next; } -- cgit v1.2.3-24-ge0c7