From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 384941F9F3 for ; Sat, 14 Aug 2021 00:29:45 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 3/3] lei: hexdigest mocks account for unwanted headers Date: Sat, 14 Aug 2021 00:29:44 +0000 Message-Id: <20210814002944.5126-4-e@80x24.org> In-Reply-To: <20210814002944.5126-1-e@80x24.org> References: <20210814002944.5126-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: PublicInbox::Import never imports @UNWANTED_HEADERS, so ensure our mock blob OIDs do the same. This ought to prevent duplicates if the PSGI mboxrd download starts setting "X-Status: F" like "lei q -tt .." --- lib/PublicInbox/FakeImport.pm | 3 +++ lib/PublicInbox/LEI.pm | 5 +++++ lib/PublicInbox/LeiQuery.pm | 2 +- lib/PublicInbox/LeiRemote.pm | 2 +- lib/PublicInbox/LeiStore.pm | 9 ++++++++- lib/PublicInbox/LeiXSearch.pm | 3 +-- 6 files changed, 19 insertions(+), 5 deletions(-) diff --git a/lib/PublicInbox/FakeImport.pm b/lib/PublicInbox/FakeImport.pm index dea25cbe..bccc3321 100644 --- a/lib/PublicInbox/FakeImport.pm +++ b/lib/PublicInbox/FakeImport.pm @@ -4,12 +4,15 @@ # pretend to do PublicInbox::Import::add for "lei index" package PublicInbox::FakeImport; use strict; +use v5.10.1; use PublicInbox::ContentHash qw(git_sha); +use PublicInbox::Import; sub new { bless { bytes_added => 0 }, __PACKAGE__ } sub add { my ($self, $eml, $check_cb, $smsg) = @_; + PublicInbox::Import::drop_unwanted_headers($eml); $smsg->populate($eml); my $raw = $eml->as_string; $smsg->{blob} = git_sha(1, \$raw)->hexdigest; diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm index 7d0f63dc..347dd280 100644 --- a/lib/PublicInbox/LEI.pm +++ b/lib/PublicInbox/LEI.pm @@ -1420,4 +1420,9 @@ sub refresh_watches { } } +sub git_blob_id { + my ($lei, $eml) = @_; + ($lei->{sto} // _lei_store($lei, 1))->git_blob_id($eml); +} + 1; diff --git a/lib/PublicInbox/LeiQuery.pm b/lib/PublicInbox/LeiQuery.pm index 37b660f9..962ad49e 100644 --- a/lib/PublicInbox/LeiQuery.pm +++ b/lib/PublicInbox/LeiQuery.pm @@ -73,7 +73,7 @@ sub lxs_prepare { my @only = @{$opt->{only} // []}; # --local is enabled by default unless --only is used # we'll allow "--only $LOCATION --local" - my $sto = $self->_lei_store(1); # FIXME: should not create + my $sto = $self->_lei_store(1); $self->{lse} = $sto->search; if ($opt->{'local'} //= scalar(@only) ? 0 : 1) { $lxs->prepare_external($self->{lse}); diff --git a/lib/PublicInbox/LeiRemote.pm b/lib/PublicInbox/LeiRemote.pm index e7deecb8..580787c0 100644 --- a/lib/PublicInbox/LeiRemote.pm +++ b/lib/PublicInbox/LeiRemote.pm @@ -32,7 +32,7 @@ sub _each_mboxrd_eml { # callback for MboxReader->mboxrd $smsg = $res if ref($res) eq ref($smsg); } $smsg->{blob} //= $xoids ? (keys(%$xoids))[0] - : git_sha(1, $eml)->hexdigest; + : $lei->git_blob_id($eml); $smsg->populate($eml); $smsg->{mid} //= '(none)'; push @{$self->{smsg}}, $smsg; diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm index ce66014f..3f33d114 100644 --- a/lib/PublicInbox/LeiStore.pm +++ b/lib/PublicInbox/LeiStore.pm @@ -20,7 +20,7 @@ use PublicInbox::Eml; use PublicInbox::Import; use PublicInbox::InboxWritable qw(eml_from_path); use PublicInbox::V2Writable; -use PublicInbox::ContentHash qw(content_hash); +use PublicInbox::ContentHash qw(content_hash git_sha); use PublicInbox::MID qw(mids); use PublicInbox::LeiSearch; use PublicInbox::MDA; @@ -508,4 +508,11 @@ sub write_prepare { $lei->{sto} = $self; } +# TODO: support SHA-256 +sub git_blob_id { # called via LEI->git_blob_id + my ($self, $eml) = @_; + $eml->header_set($_) for @PublicInbox::Import::UNWANTED_HEADERS; + git_sha(1, $eml)->hexdigest; +} + 1; diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm index 971f3a06..5e34d864 100644 --- a/lib/PublicInbox/LeiXSearch.pm +++ b/lib/PublicInbox/LeiXSearch.pm @@ -274,8 +274,7 @@ sub each_remote_eml { # callback for MboxReader->mboxrd $smsg->{kw} = []; # short-circuit xsmsg_vmd } } - $smsg->{blob} //= $xoids ? (keys(%$xoids))[0] - : git_sha(1, $eml)->hexdigest; + $smsg->{blob} //= $xoids ? (keys(%$xoids))[0] : $lei->git_blob_id($eml); _smsg_fill($smsg, $eml); wait_startq($lei); if ($lei->{-progress}) {