From cb2b5984109b2caad941e3a2c952219890079acc Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 31 Dec 2020 13:51:38 +0000 Subject: lei_store: handle messages without Message-ID at all For personal mail, unsent drafts messages are a common source of messages without Message-IDs. --- lib/PublicInbox/LeiStore.pm | 20 ++++++++++++++++---- lib/PublicInbox/OverIdx.pm | 2 ++ lib/PublicInbox/Smsg.pm | 6 ++---- 3 files changed, 20 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm index 43fddf6d..c8b9d75e 100644 --- a/lib/PublicInbox/LeiStore.pm +++ b/lib/PublicInbox/LeiStore.pm @@ -15,8 +15,8 @@ use PublicInbox::ExtSearchIdx; use PublicInbox::Import; use PublicInbox::InboxWritable; use PublicInbox::V2Writable; -use PublicInbox::ContentHash qw(content_hash); -use PublicInbox::MID qw(mids); +use PublicInbox::ContentHash qw(content_hash content_digest); +use PublicInbox::MID qw(mids mids_in); use PublicInbox::LeiSearch; use List::Util qw(max); @@ -107,14 +107,26 @@ sub eidx_init { $eidx; } +# when a message has no Message-IDs at all, this is needed for +# unsent Draft messages, at least +sub _fake_mid_for ($$) { + my ($eml, $dig) = @_; + my $mids = mids_in($eml, qw(X-Alt-Message-ID Resent-Message-ID)); + $eml->{-lei_fake_mid} = + $mids->[0] // PublicInbox::Import::digest2mid($dig, $eml); +} + sub _docids_for ($$) { my ($self, $eml) = @_; my %docids; - my $chash = content_hash($eml); + my $dig = content_digest($eml); + my $chash = $dig->clone->digest; my $eidx = eidx_init($self); my $oidx = $eidx->{oidx}; my $im = $self->{im}; - for my $mid (@{mids($eml)}) { + my $mids = mids($eml); + $mids->[0] //= _fake_mid_for($eml, $dig); + for my $mid (@$mids) { my ($id, $prev); while (my $cur = $oidx->next_by_mid($mid, \$id, \$prev)) { my $oid = $cur->{blob}; diff --git a/lib/PublicInbox/OverIdx.pm b/lib/PublicInbox/OverIdx.pm index bc2e3ef4..dad3966d 100644 --- a/lib/PublicInbox/OverIdx.pm +++ b/lib/PublicInbox/OverIdx.pm @@ -284,6 +284,8 @@ sub add_overview { $smsg->{lines} = $eml->body_raw =~ tr!\n!\n!; my $mids = mids_for_index($eml); my $refs = parse_references($smsg, $eml, $mids); + $mids->[0] //= $smsg->{mid} //= $eml->{-lei_fake_mid}; + $smsg->{mid} //= ''; my $subj = $smsg->{subject}; my $xpath; if ($subj ne '') { diff --git a/lib/PublicInbox/Smsg.pm b/lib/PublicInbox/Smsg.pm index 14086538..9db10c64 100644 --- a/lib/PublicInbox/Smsg.pm +++ b/lib/PublicInbox/Smsg.pm @@ -69,7 +69,7 @@ sub psgi_cull ($) { $self; } -# for Import and v1 non-SQLite WWW code paths +# used for v2, Import and v1 non-SQLite WWW code paths sub populate { my ($self, $hdr, $sync) = @_; for my $f (qw(From To Cc Subject)) { @@ -100,9 +100,7 @@ sub populate { $self->{-ts} = [ my @ts = msg_timestamp($hdr, $sync->{cotime}) ]; $self->{ds} //= $ds[0]; # no zone $self->{ts} //= $ts[0]; - - # for v1 users w/o SQLite - $self->{mid} //= eval { mids($hdr)->[0] } // ''; + $self->{mid} //= mids($hdr)->[0]; } # no strftime, that is locale-dependent and not for RFC822 -- cgit v1.2.3-24-ge0c7