From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 2/5] lei_dedupe: adjust to prepare for saved searches
Date: Tue, 13 Apr 2021 10:54:43 +0000 [thread overview]
Message-ID: <20210413105446.7245-3-e@80x24.org> (raw)
In-Reply-To: <20210413105446.7245-1-e@80x24.org>
LeiSavedSearch will use a LeiDedupe-like internal API,
so we won't have to make as many changes to callsites
between saved and unsaved searches.
---
lib/PublicInbox/LeiDedupe.pm | 16 ++++++++--------
lib/PublicInbox/LeiToMail.pm | 6 +++---
t/lei_dedupe.t | 11 +++++++----
3 files changed, 18 insertions(+), 15 deletions(-)
diff --git a/lib/PublicInbox/LeiDedupe.pm b/lib/PublicInbox/LeiDedupe.pm
index a62b3a7c..378f748e 100644
--- a/lib/PublicInbox/LeiDedupe.pm
+++ b/lib/PublicInbox/LeiDedupe.pm
@@ -41,8 +41,8 @@ sub smsg_hash ($) {
sub dedupe_oid ($) {
my ($skv) = @_;
(sub { # may be called in a child process
- my ($eml, $oid) = @_;
- $skv->set_maybe(_oidbin($oid) // _regen_oid($eml), '');
+ my ($eml, $oidhex) = @_;
+ $skv->set_maybe(_oidbin($oidhex) // _regen_oid($eml), '');
}, sub {
my ($smsg) = @_;
$skv->set_maybe(_oidbin($smsg->{blob}), '');
@@ -53,9 +53,9 @@ sub dedupe_oid ($) {
sub dedupe_mid ($) {
my ($skv) = @_;
(sub { # may be called in a child process
- my ($eml, $oid) = @_;
- # TODO: lei will support non-public messages w/o Message-ID
- my $mid = $eml->header_raw('Message-ID') // _oidbin($oid) //
+ my ($eml, $oidhex) = @_;
+ # lei supports non-public drafts w/o Message-ID
+ my $mid = $eml->header_raw('Message-ID') // _oidbin($oidhex) //
content_hash($eml);
$skv->set_maybe($mid, '');
}, sub {
@@ -71,7 +71,7 @@ sub dedupe_mid ($) {
sub dedupe_content ($) {
my ($skv) = @_;
(sub { # may be called in a child process
- my ($eml) = @_; # oid = $_[1], ignored
+ my ($eml) = @_; # $oidhex = $_[1], ignored
$skv->set_maybe(content_hash($eml), '');
}, sub {
my ($smsg) = @_;
@@ -104,8 +104,8 @@ sub new {
# returns true on seen messages according to the deduplication strategy,
# returns false if unseen
sub is_dup {
- my ($self, $eml, $oid) = @_;
- !$self->[1]->($eml, $oid);
+ my ($self, $eml, $smsg) = @_;
+ !$self->[1]->($eml, $smsg ? $smsg->{blob} : undef);
}
sub is_smsg_dup {
diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm
index 70164e40..7adbffe7 100644
--- a/lib/PublicInbox/LeiToMail.pm
+++ b/lib/PublicInbox/LeiToMail.pm
@@ -194,7 +194,7 @@ sub _mbox_write_cb ($$) {
sub { # for git_to_mail
my ($buf, $smsg, $eml) = @_;
$eml //= PublicInbox::Eml->new($buf);
- return if $dedupe->is_dup($eml, $smsg->{blob});
+ return if $dedupe->is_dup($eml, $smsg);
$lse->xsmsg_vmd($smsg) if $lse;
$buf = $eml2mbox->($eml, $smsg);
return atomic_append($lei, $buf) if $atomic_append;
@@ -280,7 +280,7 @@ sub _maildir_write_cb ($$) {
$lse->xsmsg_vmd($smsg) if $lse;
return _buf2maildir($dst, $buf, $smsg) if !$dedupe;
$eml //= PublicInbox::Eml->new($$buf); # copy buf
- return if $dedupe->is_dup($eml, $smsg->{blob});
+ return if $dedupe->is_dup($eml, $smsg);
undef $eml;
_buf2maildir($dst, $buf, $smsg);
}
@@ -299,7 +299,7 @@ sub _imap_write_cb ($$) {
$mic // return $lei->fail; # mic may be undef-ed in last run
if ($dedupe) {
$eml //= PublicInbox::Eml->new($$bref); # copy bref
- return if $dedupe->is_dup($eml, $smsg->{blob});
+ return if $dedupe->is_dup($eml, $smsg);
}
$lse->xsmsg_vmd($smsg) if $lse;
eval { $imap_append->($mic, $folder, $bref, $smsg, $eml) };
diff --git a/t/lei_dedupe.t b/t/lei_dedupe.t
index bcb06a0a..e1944d02 100644
--- a/t/lei_dedupe.t
+++ b/t/lei_dedupe.t
@@ -74,10 +74,13 @@ ok(!$dd->is_dup($different), 'different is_dup with mid dedupe (augment)');
$different->header_set('Status', 'RO');
ok($dd->is_dup($different), 'different seen with oid dedupe Status removed');
-ok(!$dd->is_dup($eml, '01d'), '1st is_dup with oid dedupe');
-ok($dd->is_dup($different, '01d'), 'different content ignored if oid matches');
-ok($dd->is_dup($eml, '01D'), 'case insensitive oid comparison :P');
-ok(!$dd->is_dup($eml, '01dbad'), 'case insensitive oid comparison :P');
+$smsg = { blob => '01d' };
+ok(!$dd->is_dup($eml, $smsg), '1st is_dup with oid dedupe');
+ok($dd->is_dup($different, $smsg), 'different content ignored if oid matches');
+$smsg->{blob} = uc($smsg->{blob});
+ok($dd->is_dup($eml, $smsg), 'case insensitive oid comparison :P');
+$smsg->{blob} = '01dbad';
+ok(!$dd->is_dup($eml, $smsg), 'case insensitive oid comparison :P');
$smsg->{blob} = 'dead';
ok(!$dd->is_smsg_dup($smsg), 'smsg dedupe pass');
next prev parent reply other threads:[~2021-04-13 10:54 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-04-13 10:54 [PATCH 0/5] "lei q --save" + "lei up" Eric Wong
2021-04-13 10:54 ` [PATCH 1/5] lei_xsearch: use per-external queries when not sorting Eric Wong
2021-04-13 10:54 ` Eric Wong [this message]
2021-04-13 10:54 ` [PATCH 3/5] lei_query: rearrange internals to capture query early Eric Wong
2021-04-13 10:54 ` [PATCH 4/5] lei q: start wiring up saved search Eric Wong
2021-04-13 11:25 ` Eric Wong
2021-04-13 19:13 ` Eric Wong
2021-04-13 10:54 ` [PATCH 5/5] lei: add "lei up" to complement "lei q --save" Eric Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://public-inbox.org/README
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210413105446.7245-3-e@80x24.org \
--to=e@80x24.org \
--cc=meta@public-inbox.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).