From ea9b01fc63f864435370d3bc9cec674ce03e887e Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 14 Aug 2021 00:29:44 +0000 Subject: lei: hexdigest mocks account for unwanted headers PublicInbox::Import never imports @UNWANTED_HEADERS, so ensure our mock blob OIDs do the same. This ought to prevent duplicates if the PSGI mboxrd download starts setting "X-Status: F" like "lei q -tt .." --- lib/PublicInbox/FakeImport.pm | 3 +++ lib/PublicInbox/LEI.pm | 5 +++++ lib/PublicInbox/LeiQuery.pm | 2 +- lib/PublicInbox/LeiRemote.pm | 2 +- lib/PublicInbox/LeiStore.pm | 9 ++++++++- lib/PublicInbox/LeiXSearch.pm | 3 +-- 6 files changed, 19 insertions(+), 5 deletions(-) (limited to 'lib/PublicInbox') diff --git a/lib/PublicInbox/FakeImport.pm b/lib/PublicInbox/FakeImport.pm index dea25cbe..bccc3321 100644 --- a/lib/PublicInbox/FakeImport.pm +++ b/lib/PublicInbox/FakeImport.pm @@ -4,12 +4,15 @@ # pretend to do PublicInbox::Import::add for "lei index" package PublicInbox::FakeImport; use strict; +use v5.10.1; use PublicInbox::ContentHash qw(git_sha); +use PublicInbox::Import; sub new { bless { bytes_added => 0 }, __PACKAGE__ } sub add { my ($self, $eml, $check_cb, $smsg) = @_; + PublicInbox::Import::drop_unwanted_headers($eml); $smsg->populate($eml); my $raw = $eml->as_string; $smsg->{blob} = git_sha(1, \$raw)->hexdigest; diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm index 7d0f63dc..347dd280 100644 --- a/lib/PublicInbox/LEI.pm +++ b/lib/PublicInbox/LEI.pm @@ -1420,4 +1420,9 @@ sub refresh_watches { } } +sub git_blob_id { + my ($lei, $eml) = @_; + ($lei->{sto} // _lei_store($lei, 1))->git_blob_id($eml); +} + 1; diff --git a/lib/PublicInbox/LeiQuery.pm b/lib/PublicInbox/LeiQuery.pm index 37b660f9..962ad49e 100644 --- a/lib/PublicInbox/LeiQuery.pm +++ b/lib/PublicInbox/LeiQuery.pm @@ -73,7 +73,7 @@ sub lxs_prepare { my @only = @{$opt->{only} // []}; # --local is enabled by default unless --only is used # we'll allow "--only $LOCATION --local" - my $sto = $self->_lei_store(1); # FIXME: should not create + my $sto = $self->_lei_store(1); $self->{lse} = $sto->search; if ($opt->{'local'} //= scalar(@only) ? 0 : 1) { $lxs->prepare_external($self->{lse}); diff --git a/lib/PublicInbox/LeiRemote.pm b/lib/PublicInbox/LeiRemote.pm index e7deecb8..580787c0 100644 --- a/lib/PublicInbox/LeiRemote.pm +++ b/lib/PublicInbox/LeiRemote.pm @@ -32,7 +32,7 @@ sub _each_mboxrd_eml { # callback for MboxReader->mboxrd $smsg = $res if ref($res) eq ref($smsg); } $smsg->{blob} //= $xoids ? (keys(%$xoids))[0] - : git_sha(1, $eml)->hexdigest; + : $lei->git_blob_id($eml); $smsg->populate($eml); $smsg->{mid} //= '(none)'; push @{$self->{smsg}}, $smsg; diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm index ce66014f..3f33d114 100644 --- a/lib/PublicInbox/LeiStore.pm +++ b/lib/PublicInbox/LeiStore.pm @@ -20,7 +20,7 @@ use PublicInbox::Eml; use PublicInbox::Import; use PublicInbox::InboxWritable qw(eml_from_path); use PublicInbox::V2Writable; -use PublicInbox::ContentHash qw(content_hash); +use PublicInbox::ContentHash qw(content_hash git_sha); use PublicInbox::MID qw(mids); use PublicInbox::LeiSearch; use PublicInbox::MDA; @@ -508,4 +508,11 @@ sub write_prepare { $lei->{sto} = $self; } +# TODO: support SHA-256 +sub git_blob_id { # called via LEI->git_blob_id + my ($self, $eml) = @_; + $eml->header_set($_) for @PublicInbox::Import::UNWANTED_HEADERS; + git_sha(1, $eml)->hexdigest; +} + 1; diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm index 971f3a06..5e34d864 100644 --- a/lib/PublicInbox/LeiXSearch.pm +++ b/lib/PublicInbox/LeiXSearch.pm @@ -274,8 +274,7 @@ sub each_remote_eml { # callback for MboxReader->mboxrd $smsg->{kw} = []; # short-circuit xsmsg_vmd } } - $smsg->{blob} //= $xoids ? (keys(%$xoids))[0] - : git_sha(1, $eml)->hexdigest; + $smsg->{blob} //= $xoids ? (keys(%$xoids))[0] : $lei->git_blob_id($eml); _smsg_fill($smsg, $eml); wait_startq($lei); if ($lei->{-progress}) { -- cgit v1.2.3-24-ge0c7