From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 20/36] lei_store: handle messages without Message-ID at all
Date: Thu, 31 Dec 2020 13:51:38 +0000 [thread overview]
Message-ID: <20201231135154.6070-21-e@80x24.org> (raw)
In-Reply-To: <20201231135154.6070-1-e@80x24.org>
For personal mail, unsent drafts messages are a common source of
messages without Message-IDs.
---
lib/PublicInbox/LeiStore.pm | 20 ++++++++++++++++----
lib/PublicInbox/OverIdx.pm | 2 ++
lib/PublicInbox/Smsg.pm | 6 ++----
t/lei_store.t | 24 ++++++++++++++++++++++++
4 files changed, 44 insertions(+), 8 deletions(-)
diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm
index 43fddf6d..c8b9d75e 100644
--- a/lib/PublicInbox/LeiStore.pm
+++ b/lib/PublicInbox/LeiStore.pm
@@ -15,8 +15,8 @@ use PublicInbox::ExtSearchIdx;
use PublicInbox::Import;
use PublicInbox::InboxWritable;
use PublicInbox::V2Writable;
-use PublicInbox::ContentHash qw(content_hash);
-use PublicInbox::MID qw(mids);
+use PublicInbox::ContentHash qw(content_hash content_digest);
+use PublicInbox::MID qw(mids mids_in);
use PublicInbox::LeiSearch;
use List::Util qw(max);
@@ -107,14 +107,26 @@ sub eidx_init {
$eidx;
}
+# when a message has no Message-IDs at all, this is needed for
+# unsent Draft messages, at least
+sub _fake_mid_for ($$) {
+ my ($eml, $dig) = @_;
+ my $mids = mids_in($eml, qw(X-Alt-Message-ID Resent-Message-ID));
+ $eml->{-lei_fake_mid} =
+ $mids->[0] // PublicInbox::Import::digest2mid($dig, $eml);
+}
+
sub _docids_for ($$) {
my ($self, $eml) = @_;
my %docids;
- my $chash = content_hash($eml);
+ my $dig = content_digest($eml);
+ my $chash = $dig->clone->digest;
my $eidx = eidx_init($self);
my $oidx = $eidx->{oidx};
my $im = $self->{im};
- for my $mid (@{mids($eml)}) {
+ my $mids = mids($eml);
+ $mids->[0] //= _fake_mid_for($eml, $dig);
+ for my $mid (@$mids) {
my ($id, $prev);
while (my $cur = $oidx->next_by_mid($mid, \$id, \$prev)) {
my $oid = $cur->{blob};
diff --git a/lib/PublicInbox/OverIdx.pm b/lib/PublicInbox/OverIdx.pm
index bc2e3ef4..dad3966d 100644
--- a/lib/PublicInbox/OverIdx.pm
+++ b/lib/PublicInbox/OverIdx.pm
@@ -284,6 +284,8 @@ sub add_overview {
$smsg->{lines} = $eml->body_raw =~ tr!\n!\n!;
my $mids = mids_for_index($eml);
my $refs = parse_references($smsg, $eml, $mids);
+ $mids->[0] //= $smsg->{mid} //= $eml->{-lei_fake_mid};
+ $smsg->{mid} //= '';
my $subj = $smsg->{subject};
my $xpath;
if ($subj ne '') {
diff --git a/lib/PublicInbox/Smsg.pm b/lib/PublicInbox/Smsg.pm
index 14086538..9db10c64 100644
--- a/lib/PublicInbox/Smsg.pm
+++ b/lib/PublicInbox/Smsg.pm
@@ -69,7 +69,7 @@ sub psgi_cull ($) {
$self;
}
-# for Import and v1 non-SQLite WWW code paths
+# used for v2, Import and v1 non-SQLite WWW code paths
sub populate {
my ($self, $hdr, $sync) = @_;
for my $f (qw(From To Cc Subject)) {
@@ -100,9 +100,7 @@ sub populate {
$self->{-ts} = [ my @ts = msg_timestamp($hdr, $sync->{cotime}) ];
$self->{ds} //= $ds[0]; # no zone
$self->{ts} //= $ts[0];
-
- # for v1 users w/o SQLite
- $self->{mid} //= eval { mids($hdr)->[0] } // '';
+ $self->{mid} //= mids($hdr)->[0];
}
# no strftime, that is locale-dependent and not for RFC822
diff --git a/t/lei_store.t b/t/lei_store.t
index bc0d66c2..beb5a8c4 100644
--- a/t/lei_store.t
+++ b/t/lei_store.t
@@ -100,6 +100,30 @@ for my $parallel (0, 1) {
SKIP: {
require_mods(qw(Storable), 1);
ok($lst->can('ipc_do'), 'ipc_do works if we have Storable');
+ $eml->header_set('Message-ID', '<ipc-test@example>');
+ my $pid = $lst->ipc_worker_spawn('lei-store');
+ ok($pid > 0, 'got a worker');
+ my $smsg = $lst->ipc_do('set_eml', $eml, qw(seen));
+ is(ref($smsg), 'PublicInbox::Smsg', 'set_eml works over ipc');
+ my $ids = $lst->ipc_do('set_eml', $eml, qw(seen));
+ is_deeply($ids, [ $smsg->{num} ], 'docid returned');
+
+ $eml->header_set('Message-ID');
+ my $no_mid = $lst->ipc_do('set_eml', $eml, qw(seen));
+ my $wait = $lst->ipc_do('done');
+ my @kw = $lst->search->msg_keywords($no_mid->{num});
+ is_deeply(\@kw, [qw(seen)], 'ipc set changed kw');
+
+ is(ref($smsg), 'PublicInbox::Smsg', 'no mid works ipc');
+ $ids = $lst->ipc_do('set_eml', $eml, qw(seen));
+ is_deeply($ids, [ $no_mid->{num} ], 'docid returned w/o mid w/ ipc');
+ $lst->ipc_do('done');
+ $lst->ipc_worker_stop;
+ $ids = $lst->ipc_do('set_eml', $eml, qw(seen answered));
+ is_deeply($ids, [ $no_mid->{num} ], 'docid returned w/o mid w/o ipc');
+ $wait = $lst->ipc_do('done');
+ @kw = $lst->search->msg_keywords($no_mid->{num});
+ is_deeply(\@kw, [qw(answered seen)], 'set changed kw w/o ipc');
}
done_testing;
next prev parent reply other threads:[~2020-12-31 13:51 UTC|newest]
Thread overview: 37+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-12-31 13:51 [PATCH 00/36] another round of lei stuff Eric Wong
2020-12-31 13:51 ` [PATCH 01/36] import: respect init.defaultBranch Eric Wong
2020-12-31 13:51 ` [PATCH 02/36] lei_store: use per-machine refname as git HEAD Eric Wong
2020-12-31 13:51 ` [PATCH 03/36] revert "lei_store: use per-machine refname as git HEAD" Eric Wong
2020-12-31 13:51 ` [PATCH 04/36] lei_to_mail: initial implementation for writing mbox formats Eric Wong
2020-12-31 13:51 ` [PATCH 05/36] sharedkv: fork()-friendly key-value store Eric Wong
2020-12-31 13:51 ` [PATCH 06/36] sharedkv: split out index_values Eric Wong
2020-12-31 13:51 ` [PATCH 07/36] lei_to_mail: start atomic and compressed mbox writing Eric Wong
2020-12-31 13:51 ` [PATCH 08/36] mboxreader: new class for reading various mbox formats Eric Wong
2020-12-31 13:51 ` [PATCH 09/36] lei_to_mail: start --augment, dedupe, bz2 and xz Eric Wong
2020-12-31 13:51 ` [PATCH 10/36] lei: implement various deduplication strategies Eric Wong
2020-12-31 13:51 ` [PATCH 11/36] lei_to_mail: lazy-require LeiDedupe Eric Wong
2020-12-31 13:51 ` [PATCH 12/36] lei_to_mail: support for non-seekable outputs Eric Wong
2020-12-31 13:51 ` [PATCH 13/36] lei_to_mail: support Maildir, fix+test --augment Eric Wong
2020-12-31 13:51 ` [PATCH 14/36] ipc: generic IPC dispatch based on Storable Eric Wong
2020-12-31 13:51 ` [PATCH 15/36] ipc: support Sereal Eric Wong
2020-12-31 13:51 ` [PATCH 16/36] lei_store: add ->set_eml, ->add_eml can return smsg Eric Wong
2020-12-31 13:51 ` [PATCH 17/36] lei: rename "extinbox" => "external" Eric Wong
2020-12-31 13:51 ` [PATCH 18/36] mid: use defined-or with `push' for uniqueness check Eric Wong
2020-12-31 13:51 ` [PATCH 19/36] mid: hoist out mids_in sub Eric Wong
2020-12-31 13:51 ` Eric Wong [this message]
2020-12-31 13:51 ` [PATCH 21/36] ipc: use shutdown(2), base atfork* callback Eric Wong
2020-12-31 13:51 ` [PATCH 22/36] lei_to_mail: unlink mboxes if not augmenting Eric Wong
2020-12-31 13:51 ` [PATCH 23/36] lei: add --mfolder as an --output alias Eric Wong
2020-12-31 13:51 ` [PATCH 24/36] spawn: move run_die here from PublicInbox::Import Eric Wong
2020-12-31 13:51 ` [PATCH 25/36] init: remove embedded UnlinkMe package Eric Wong
2020-12-31 13:51 ` [PATCH 26/36] t/run: avoid uninitialized var on incomplete test Eric Wong
2020-12-31 13:51 ` [PATCH 27/36] gcf2client: reap process on DESTROY Eric Wong
2020-12-31 13:51 ` [PATCH 28/36] lei_to_mail: open FIFOs O_WRONLY so we block Eric Wong
2020-12-31 13:51 ` [PATCH 29/36] searchidxshard: call DS->Reset at worker start Eric Wong
2020-12-31 13:51 ` [PATCH 30/36] t/ipc.t: test for references via `die' Eric Wong
2020-12-31 13:51 ` [PATCH 31/36] use PublicInbox::DS for dwaitpid Eric Wong
2020-12-31 13:51 ` [PATCH 32/36] syscall: SFD_NONBLOCK can be a constant, again Eric Wong
2020-12-31 13:51 ` [PATCH 33/36] lei: avoid Spawn package when starting daemon Eric Wong
2020-12-31 13:51 ` [PATCH 34/36] avoid calling waitpid from children in DESTROY Eric Wong
2020-12-31 13:51 ` [PATCH 35/36] ds: clobber $in_loop first at reset Eric Wong
2020-12-31 13:51 ` [PATCH 36/36] on_destroy: support PID owner guard Eric Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://public-inbox.org/README
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20201231135154.6070-21-e@80x24.org \
--to=e@80x24.org \
--cc=meta@public-inbox.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).