user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 3/3] lei: hexdigest mocks account for unwanted headers
Date: Sat, 14 Aug 2021 00:29:44 +0000	[thread overview]
Message-ID: <20210814002944.5126-4-e@80x24.org> (raw)
In-Reply-To: <20210814002944.5126-1-e@80x24.org>

PublicInbox::Import never imports @UNWANTED_HEADERS, so ensure
our mock blob OIDs do the same.  This ought to prevent
duplicates if the PSGI mboxrd download starts setting
"X-Status: F" like "lei q -tt .."
---
 lib/PublicInbox/FakeImport.pm | 3 +++
 lib/PublicInbox/LEI.pm        | 5 +++++
 lib/PublicInbox/LeiQuery.pm   | 2 +-
 lib/PublicInbox/LeiRemote.pm  | 2 +-
 lib/PublicInbox/LeiStore.pm   | 9 ++++++++-
 lib/PublicInbox/LeiXSearch.pm | 3 +--
 6 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/lib/PublicInbox/FakeImport.pm b/lib/PublicInbox/FakeImport.pm
index dea25cbe..bccc3321 100644
--- a/lib/PublicInbox/FakeImport.pm
+++ b/lib/PublicInbox/FakeImport.pm
@@ -4,12 +4,15 @@
 # pretend to do PublicInbox::Import::add for "lei index"
 package PublicInbox::FakeImport;
 use strict;
+use v5.10.1;
 use PublicInbox::ContentHash qw(git_sha);
+use PublicInbox::Import;
 
 sub new { bless { bytes_added => 0 }, __PACKAGE__ }
 
 sub add {
 	my ($self, $eml, $check_cb, $smsg) = @_;
+	PublicInbox::Import::drop_unwanted_headers($eml);
 	$smsg->populate($eml);
 	my $raw = $eml->as_string;
 	$smsg->{blob} = git_sha(1, \$raw)->hexdigest;
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index 7d0f63dc..347dd280 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -1420,4 +1420,9 @@ sub refresh_watches {
 	}
 }
 
+sub git_blob_id {
+	my ($lei, $eml) = @_;
+	($lei->{sto} // _lei_store($lei, 1))->git_blob_id($eml);
+}
+
 1;
diff --git a/lib/PublicInbox/LeiQuery.pm b/lib/PublicInbox/LeiQuery.pm
index 37b660f9..962ad49e 100644
--- a/lib/PublicInbox/LeiQuery.pm
+++ b/lib/PublicInbox/LeiQuery.pm
@@ -73,7 +73,7 @@ sub lxs_prepare {
 	my @only = @{$opt->{only} // []};
 	# --local is enabled by default unless --only is used
 	# we'll allow "--only $LOCATION --local"
-	my $sto = $self->_lei_store(1); # FIXME: should not create
+	my $sto = $self->_lei_store(1);
 	$self->{lse} = $sto->search;
 	if ($opt->{'local'} //= scalar(@only) ? 0 : 1) {
 		$lxs->prepare_external($self->{lse});
diff --git a/lib/PublicInbox/LeiRemote.pm b/lib/PublicInbox/LeiRemote.pm
index e7deecb8..580787c0 100644
--- a/lib/PublicInbox/LeiRemote.pm
+++ b/lib/PublicInbox/LeiRemote.pm
@@ -32,7 +32,7 @@ sub _each_mboxrd_eml { # callback for MboxReader->mboxrd
 		$smsg = $res if ref($res) eq ref($smsg);
 	}
 	$smsg->{blob} //= $xoids ? (keys(%$xoids))[0]
-				: git_sha(1, $eml)->hexdigest;
+				: $lei->git_blob_id($eml);
 	$smsg->populate($eml);
 	$smsg->{mid} //= '(none)';
 	push @{$self->{smsg}}, $smsg;
diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm
index ce66014f..3f33d114 100644
--- a/lib/PublicInbox/LeiStore.pm
+++ b/lib/PublicInbox/LeiStore.pm
@@ -20,7 +20,7 @@ use PublicInbox::Eml;
 use PublicInbox::Import;
 use PublicInbox::InboxWritable qw(eml_from_path);
 use PublicInbox::V2Writable;
-use PublicInbox::ContentHash qw(content_hash);
+use PublicInbox::ContentHash qw(content_hash git_sha);
 use PublicInbox::MID qw(mids);
 use PublicInbox::LeiSearch;
 use PublicInbox::MDA;
@@ -508,4 +508,11 @@ sub write_prepare {
 	$lei->{sto} = $self;
 }
 
+# TODO: support SHA-256
+sub git_blob_id { # called via LEI->git_blob_id
+	my ($self, $eml) = @_;
+	$eml->header_set($_) for @PublicInbox::Import::UNWANTED_HEADERS;
+	git_sha(1, $eml)->hexdigest;
+}
+
 1;
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index 971f3a06..5e34d864 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -274,8 +274,7 @@ sub each_remote_eml { # callback for MboxReader->mboxrd
 			$smsg->{kw} = []; # short-circuit xsmsg_vmd
 		}
 	}
-	$smsg->{blob} //= $xoids ? (keys(%$xoids))[0]
-				: git_sha(1, $eml)->hexdigest;
+	$smsg->{blob} //= $xoids ? (keys(%$xoids))[0] : $lei->git_blob_id($eml);
 	_smsg_fill($smsg, $eml);
 	wait_startq($lei);
 	if ($lei->{-progress}) {

  parent reply	other threads:[~2021-08-14  0:29 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-08-14  0:29 [PATCH 0/3] lei: hopefully kill /Document \d+ not found/ errors Eric Wong
2021-08-14  0:29 ` [PATCH 1/3] lei: diagnostics for " Eric Wong
2021-08-14  0:29 ` [PATCH 2/3] lei <q|up>: wait on remote mboxrd imports synchronously Eric Wong
2021-08-14  0:29 ` Eric Wong [this message]
2021-08-24 20:14 ` [PATCH 0/3] lei: hopefully^W kill /Document \d+ not found/ errors Eric Wong
2021-10-14  5:31   ` Eric Wong
2021-10-15  9:52     ` [PATCH] lei q: avoid kw lookup failure on remote mboxrd Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210814002944.5126-4-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).