From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 63F821F9FF for ; Sun, 21 Mar 2021 09:50:47 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 2/3] lei q: fix warning on remote imports Date: Sun, 21 Mar 2021 15:50:46 +0600 Message-Id: <20210321095047.13855-3-e@80x24.org> In-Reply-To: <20210321095047.13855-1-e@80x24.org> References: <20210321095047.13855-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: This will let us tie keywords from remote externals to those which only exist in local externals. --- lib/PublicInbox/ContentHash.pm | 15 ++++++++++++--- lib/PublicInbox/LeiDedupe.pm | 9 ++------- lib/PublicInbox/LeiXSearch.pm | 6 +++++- t/lei-q-remote-import.t | 3 ++- 4 files changed, 21 insertions(+), 12 deletions(-) diff --git a/lib/PublicInbox/ContentHash.pm b/lib/PublicInbox/ContentHash.pm index 4dbe7b50..112b1ea6 100644 --- a/lib/PublicInbox/ContentHash.pm +++ b/lib/PublicInbox/ContentHash.pm @@ -8,9 +8,9 @@ # See L manpage for more details. package PublicInbox::ContentHash; use strict; -use warnings; -use base qw/Exporter/; -our @EXPORT_OK = qw/content_hash content_digest/; +use v5.10.1; +use parent qw(Exporter); +our @EXPORT_OK = qw(content_hash content_digest git_sha); use PublicInbox::MID qw(mids references); use PublicInbox::MsgIter; @@ -94,4 +94,13 @@ sub content_hash ($) { content_digest($_[0])->digest; } +sub git_sha ($$) { + my ($n, $eml) = @_; + my $dig = Digest::SHA->new($n); + my $buf = $eml->as_string; + $dig->add('blob '.length($buf)."\0"); + $dig->add($buf); + $dig; +} + 1; diff --git a/lib/PublicInbox/LeiDedupe.pm b/lib/PublicInbox/LeiDedupe.pm index 5fec9384..a62b3a7c 100644 --- a/lib/PublicInbox/LeiDedupe.pm +++ b/lib/PublicInbox/LeiDedupe.pm @@ -3,7 +3,7 @@ package PublicInbox::LeiDedupe; use strict; use v5.10.1; -use PublicInbox::ContentHash qw(content_hash); +use PublicInbox::ContentHash qw(content_hash git_sha); use Digest::SHA (); # n.b. mutt sets most of these headers not sure about Bytes @@ -18,12 +18,7 @@ sub _regen_oid ($) { push @stash, [ $k, \@v ]; $eml->header_set($k); # restore below } - my $dig = Digest::SHA->new(1); # XXX SHA256 later - my $buf = $eml->as_string; - $dig->add('blob '.length($buf)."\0"); - $dig->add($buf); - undef $buf; - + my $dig = git_sha(1, $eml); for my $kv (@stash) { # restore stashed headers my ($k, @v) = @$kv; $eml->header_set($k, @v); diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm index 17171a7f..b6aaf3e1 100644 --- a/lib/PublicInbox/LeiXSearch.pm +++ b/lib/PublicInbox/LeiXSearch.pm @@ -18,6 +18,7 @@ use PublicInbox::MID qw(mids); use PublicInbox::Smsg; use PublicInbox::Eml; use Fcntl qw(SEEK_SET F_SETFL O_APPEND O_RDWR); +use PublicInbox::ContentHash qw(git_sha); sub new { my ($class) = @_; @@ -207,10 +208,13 @@ sub query_mset { # non-parallel for non-"--threads" users sub each_remote_eml { # callback for MboxReader->mboxrd my ($eml, $self, $lei, $each_smsg) = @_; - if ($self->{import_sto} && !$lei->{ale}->xoids_for($eml, 1)) { + my $xoids = $lei->{ale}->xoids_for($eml, 1); + if ($self->{import_sto} && !$xoids) { $self->{import_sto}->ipc_do('add_eml', $eml); } my $smsg = bless {}, 'PublicInbox::Smsg'; + $smsg->{blob} = $xoids ? (keys(%$xoids))[0] + : git_sha(1, $eml)->hexdigest; $smsg->populate($eml); $smsg->parse_references($eml, mids($eml)); $smsg->{$_} //= '' for qw(from to cc ds subject references mid); diff --git a/t/lei-q-remote-import.t b/t/lei-q-remote-import.t index 25e461ac..93828a24 100644 --- a/t/lei-q-remote-import.t +++ b/t/lei-q-remote-import.t @@ -65,8 +65,9 @@ test_lei({ tmpdir => $tmpdir }, sub { $im->add(eml_load('t/utf8.eml')) or BAIL_OUT '->add'; }; lei_ok(qw(add-external -q), $ibx->{inboxdir}); - lei_ok(qw(q -o), "mboxrd:$o", '--only', $url, + lei_ok(qw(q -q -o), "mboxrd:$o", '--only', $url, 'm:testmessage@example.com'); + is($lei_err, '', 'no warnings or errors'); ok(-s $o, 'got result from remote external'); my $exp = eml_load('t/utf8.eml'); is_deeply($slurp_emls->($o), [$exp], 'got expected result');