From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id BA9271F9FF for ; Wed, 24 Feb 2021 23:37:18 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 2/2] lei q: auto-memoize remote messages into lei/store Date: Thu, 25 Feb 2021 05:37:18 +0600 Message-Id: <20210224233718.19007-3-e@80x24.org> In-Reply-To: <20210224233718.19007-1-e@80x24.org> References: <20210224233718.19007-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: This lets users avoid network traffic on subsequent searches at the expense of local disk space. --no-import-remote may be specified to reverse this trade-off for users with little storage. --- MANIFEST | 1 + lib/PublicInbox/LEI.pm | 2 ++ lib/PublicInbox/LeiQuery.pm | 1 + lib/PublicInbox/LeiXSearch.pm | 10 ++++--- t/lei-q-remote-import.t | 50 +++++++++++++++++++++++++++++++++++ 5 files changed, 61 insertions(+), 3 deletions(-) create mode 100644 t/lei-q-remote-import.t diff --git a/MANIFEST b/MANIFEST index 4c04eec8..adbd108f 100644 --- a/MANIFEST +++ b/MANIFEST @@ -372,6 +372,7 @@ t/lei-import-maildir.t t/lei-import-nntp.t t/lei-import.t t/lei-mirror.t +t/lei-q-remote-import.t t/lei.t t/lei_dedupe.t t/lei_external.t diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm index 8cd95ac2..50665b3e 100644 --- a/lib/PublicInbox/LEI.pm +++ b/lib/PublicInbox/LEI.pm @@ -112,6 +112,7 @@ our %CMD = ( # sorted in order of importance/use: save-as=s output|mfolder|o=s format|f=s dedupe|d=s threads|t augment|a sort|s=s reverse|r offset=i remote! local! external! pretty include|I=s@ exclude=s@ only=s@ jobs|j=s globoff|g stdin| + import-remote! alert=s@ mua=s no-torsocks torsocks=s verbose|v+ quiet|q C=s@), PublicInbox::LeiQuery::curl_opt(), opt_dash('limit|n=i', '[0-9]+') ], @@ -225,6 +226,7 @@ my %OPTDESC = ( 'whether or not to wrap git and curl commands with torsocks'], 'no-torsocks' => 'alias for --torsocks=no', 'save-as=s' => ['NAME', 'save a search terms by given name'], +'import-remote!' => 'do not memoize remote messages into local store', 'type=s' => [ 'any|mid|git', 'disambiguate type' ], diff --git a/lib/PublicInbox/LeiQuery.pm b/lib/PublicInbox/LeiQuery.pm index 743fa3f7..b57d1cc5 100644 --- a/lib/PublicInbox/LeiQuery.pm +++ b/lib/PublicInbox/LeiQuery.pm @@ -51,6 +51,7 @@ sub lei_q { # we'll allow "--only $LOCATION --local" my $sto = $self->_lei_store(1); my $lse = $sto->search; + $sto->write_prepare($self) if $opt->{'import-remote'} //= 1; if ($opt->{'local'} //= scalar(@only) ? 0 : 1) { $lxs->prepare_external($lse); } diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm index c46aba3b..2d399653 100644 --- a/lib/PublicInbox/LeiXSearch.pm +++ b/lib/PublicInbox/LeiXSearch.pm @@ -189,8 +189,9 @@ sub query_mset { # non-parallel for non-"--threads" users $lei->{ovv}->ovv_atexit_child($lei); } -sub each_eml { # callback for MboxReader->mboxrd +sub each_remote_eml { # callback for MboxReader->mboxrd my ($eml, $self, $lei, $each_smsg) = @_; + $lei->{sto}->ipc_do('set_eml', $eml) if $lei->{sto}; # --import-remote my $smsg = bless {}, 'PublicInbox::Smsg'; $smsg->populate($eml); $smsg->parse_references($eml, mids($eml)); @@ -244,14 +245,17 @@ sub query_remote_mboxrd { my ($fh, $pid) = popen_rd($cmd, undef, $rdr); $reap_curl = PublicInbox::OnDestroy->new($sigint_reap, $pid); $fh = IO::Uncompress::Gunzip->new($fh); - PublicInbox::MboxReader->mboxrd($fh, \&each_eml, $self, + PublicInbox::MboxReader->mboxrd($fh, \&each_remote_eml, $self, $lei, $each_smsg); my $err = waitpid($pid, 0) == $pid ? undef : "BUG: waitpid($cmd): $!"; @$reap_curl = (); # cancel OnDestroy die $err if $err; + my $nr = $lei->{-nr_remote_eml}; + if ($nr && $lei->{sto}) { + my $wait = $lei->{sto}->ipc_do('done'); + } if ($? == 0) { - my $nr = $lei->{-nr_remote_eml}; mset_progress($lei, $lei->{-current_url}, $nr, $nr); next; } diff --git a/t/lei-q-remote-import.t b/t/lei-q-remote-import.t new file mode 100644 index 00000000..f73524cf --- /dev/null +++ b/t/lei-q-remote-import.t @@ -0,0 +1,50 @@ +#!perl -w +# Copyright (C) 2021 all contributors +# License: AGPL-3.0+ +use strict; use v5.10.1; use PublicInbox::TestCommon; +require_git 2.6; +require_mods(qw(json DBD::SQLite Search::Xapian)); +use PublicInbox::MboxReader; +my ($ro_home, $cfg_path) = setup_public_inboxes; +my $sock = tcp_server; +my ($tmpdir, $for_destroy) = tmpdir; +my $cmd = [ '-httpd', '-W0', "--stdout=$tmpdir/1", "--stderr=$tmpdir/2" ]; +my $env = { PI_CONFIG => $cfg_path }; +my $td = start_script($cmd, $env, { 3 => $sock }) or BAIL_OUT("-httpd: $?"); +my $host_port = tcp_host_port($sock); +my $url = "http://$host_port/t2/"; +my $exp1 = [ eml_load('t/plack-qp.eml') ]; +my $exp2 = [ eml_load('t/iso-2202-jp.eml') ]; +my $slurp_emls = sub { + open my $fh, '<', $_[0] or BAIL_OUT "open: $!"; + my @eml; + PublicInbox::MboxReader->mboxrd($fh, sub { + my $eml = shift; + $eml->header_set('Status'); + push @eml, $eml; + }); + \@eml; +}; + +test_lei({ tmpdir => $tmpdir }, sub { + my $o = "$ENV{HOME}/o.mboxrd"; + my @cmd = ('q', '-o', "mboxrd:$o", 'm:qp@example.com'); + lei_ok(@cmd); + ok(-f $o && !-s _, 'output exists but is empty'); + unlink $o or BAIL_OUT $!; + lei_ok(@cmd, '-I', $url); + is_deeply($slurp_emls->($o), $exp1, 'got results after remote search'); + unlink $o or BAIL_OUT $!; + lei_ok(@cmd); + ok(-f $o && -s _, 'output exists after import but is not empty'); + is_deeply($slurp_emls->($o), $exp1, 'got results w/o remote search'); + unlink $o or BAIL_OUT $!; + + $cmd[-1] = 'm:199707281508.AAA24167@hoyogw.example'; + lei_ok(@cmd, '-I', $url, '--no-import-remote'); + is_deeply($slurp_emls->($o), $exp2, 'got another after remote search'); + unlink $o or BAIL_OUT $!; + lei_ok(@cmd); + ok(-f $o && !-s _, '--no-import-remote did not memoize'); +}); +done_testing;