From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id C3E781FFA8 for ; Thu, 31 Dec 2020 13:51:56 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 20/36] lei_store: handle messages without Message-ID at all Date: Thu, 31 Dec 2020 13:51:38 +0000 Message-Id: <20201231135154.6070-21-e@80x24.org> In-Reply-To: <20201231135154.6070-1-e@80x24.org> References: <20201231135154.6070-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: For personal mail, unsent drafts messages are a common source of messages without Message-IDs. --- lib/PublicInbox/LeiStore.pm | 20 ++++++++++++++++---- lib/PublicInbox/OverIdx.pm | 2 ++ lib/PublicInbox/Smsg.pm | 6 ++---- t/lei_store.t | 24 ++++++++++++++++++++++++ 4 files changed, 44 insertions(+), 8 deletions(-) diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm index 43fddf6d..c8b9d75e 100644 --- a/lib/PublicInbox/LeiStore.pm +++ b/lib/PublicInbox/LeiStore.pm @@ -15,8 +15,8 @@ use PublicInbox::ExtSearchIdx; use PublicInbox::Import; use PublicInbox::InboxWritable; use PublicInbox::V2Writable; -use PublicInbox::ContentHash qw(content_hash); -use PublicInbox::MID qw(mids); +use PublicInbox::ContentHash qw(content_hash content_digest); +use PublicInbox::MID qw(mids mids_in); use PublicInbox::LeiSearch; use List::Util qw(max); @@ -107,14 +107,26 @@ sub eidx_init { $eidx; } +# when a message has no Message-IDs at all, this is needed for +# unsent Draft messages, at least +sub _fake_mid_for ($$) { + my ($eml, $dig) = @_; + my $mids = mids_in($eml, qw(X-Alt-Message-ID Resent-Message-ID)); + $eml->{-lei_fake_mid} = + $mids->[0] // PublicInbox::Import::digest2mid($dig, $eml); +} + sub _docids_for ($$) { my ($self, $eml) = @_; my %docids; - my $chash = content_hash($eml); + my $dig = content_digest($eml); + my $chash = $dig->clone->digest; my $eidx = eidx_init($self); my $oidx = $eidx->{oidx}; my $im = $self->{im}; - for my $mid (@{mids($eml)}) { + my $mids = mids($eml); + $mids->[0] //= _fake_mid_for($eml, $dig); + for my $mid (@$mids) { my ($id, $prev); while (my $cur = $oidx->next_by_mid($mid, \$id, \$prev)) { my $oid = $cur->{blob}; diff --git a/lib/PublicInbox/OverIdx.pm b/lib/PublicInbox/OverIdx.pm index bc2e3ef4..dad3966d 100644 --- a/lib/PublicInbox/OverIdx.pm +++ b/lib/PublicInbox/OverIdx.pm @@ -284,6 +284,8 @@ sub add_overview { $smsg->{lines} = $eml->body_raw =~ tr!\n!\n!; my $mids = mids_for_index($eml); my $refs = parse_references($smsg, $eml, $mids); + $mids->[0] //= $smsg->{mid} //= $eml->{-lei_fake_mid}; + $smsg->{mid} //= ''; my $subj = $smsg->{subject}; my $xpath; if ($subj ne '') { diff --git a/lib/PublicInbox/Smsg.pm b/lib/PublicInbox/Smsg.pm index 14086538..9db10c64 100644 --- a/lib/PublicInbox/Smsg.pm +++ b/lib/PublicInbox/Smsg.pm @@ -69,7 +69,7 @@ sub psgi_cull ($) { $self; } -# for Import and v1 non-SQLite WWW code paths +# used for v2, Import and v1 non-SQLite WWW code paths sub populate { my ($self, $hdr, $sync) = @_; for my $f (qw(From To Cc Subject)) { @@ -100,9 +100,7 @@ sub populate { $self->{-ts} = [ my @ts = msg_timestamp($hdr, $sync->{cotime}) ]; $self->{ds} //= $ds[0]; # no zone $self->{ts} //= $ts[0]; - - # for v1 users w/o SQLite - $self->{mid} //= eval { mids($hdr)->[0] } // ''; + $self->{mid} //= mids($hdr)->[0]; } # no strftime, that is locale-dependent and not for RFC822 diff --git a/t/lei_store.t b/t/lei_store.t index bc0d66c2..beb5a8c4 100644 --- a/t/lei_store.t +++ b/t/lei_store.t @@ -100,6 +100,30 @@ for my $parallel (0, 1) { SKIP: { require_mods(qw(Storable), 1); ok($lst->can('ipc_do'), 'ipc_do works if we have Storable'); + $eml->header_set('Message-ID', ''); + my $pid = $lst->ipc_worker_spawn('lei-store'); + ok($pid > 0, 'got a worker'); + my $smsg = $lst->ipc_do('set_eml', $eml, qw(seen)); + is(ref($smsg), 'PublicInbox::Smsg', 'set_eml works over ipc'); + my $ids = $lst->ipc_do('set_eml', $eml, qw(seen)); + is_deeply($ids, [ $smsg->{num} ], 'docid returned'); + + $eml->header_set('Message-ID'); + my $no_mid = $lst->ipc_do('set_eml', $eml, qw(seen)); + my $wait = $lst->ipc_do('done'); + my @kw = $lst->search->msg_keywords($no_mid->{num}); + is_deeply(\@kw, [qw(seen)], 'ipc set changed kw'); + + is(ref($smsg), 'PublicInbox::Smsg', 'no mid works ipc'); + $ids = $lst->ipc_do('set_eml', $eml, qw(seen)); + is_deeply($ids, [ $no_mid->{num} ], 'docid returned w/o mid w/ ipc'); + $lst->ipc_do('done'); + $lst->ipc_worker_stop; + $ids = $lst->ipc_do('set_eml', $eml, qw(seen answered)); + is_deeply($ids, [ $no_mid->{num} ], 'docid returned w/o mid w/o ipc'); + $wait = $lst->ipc_do('done'); + @kw = $lst->search->msg_keywords($no_mid->{num}); + is_deeply(\@kw, [qw(answered seen)], 'set changed kw w/o ipc'); } done_testing;