From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 07/14] lei_dedupe+shared_kv: ensure round-tripping serialization
Date: Wed, 13 Jan 2021 19:06:20 -1200 [thread overview]
Message-ID: <20210114070627.18195-8-e@80x24.org> (raw)
In-Reply-To: <20210114070627.18195-1-e@80x24.org>
We'll be passing these objects via PublicInbox::IPC which uses
Storable (or Sereal), so ensure they're safe to use after
serialization.
---
lib/PublicInbox/LeiDedupe.pm | 29 ++++++++++++++++-------------
lib/PublicInbox/SharedKV.pm | 12 +++++++++---
t/lei_dedupe.t | 13 +++++++++++++
3 files changed, 38 insertions(+), 16 deletions(-)
diff --git a/lib/PublicInbox/LeiDedupe.pm b/lib/PublicInbox/LeiDedupe.pm
index 58eee533..81754361 100644
--- a/lib/PublicInbox/LeiDedupe.pm
+++ b/lib/PublicInbox/LeiDedupe.pm
@@ -43,9 +43,9 @@ sub smsg_hash ($) {
}
# the paranoid option
-sub dedupe_oid () {
- my $skv = PublicInbox::SharedKV->new;
- ($skv, sub { # may be called in a child process
+sub dedupe_oid ($) {
+ my ($skv) = @_;
+ (sub { # may be called in a child process
my ($eml, $oid) = @_;
$skv->set_maybe(_oidbin($oid) // _regen_oid($eml), '');
}, sub {
@@ -55,9 +55,9 @@ sub dedupe_oid () {
}
# dangerous if there's duplicate messages with different Message-IDs
-sub dedupe_mid () {
- my $skv = PublicInbox::SharedKV->new;
- ($skv, sub { # may be called in a child process
+sub dedupe_mid ($) {
+ my ($skv) = @_;
+ (sub { # may be called in a child process
my ($eml, $oid) = @_;
# TODO: lei will support non-public messages w/o Message-ID
my $mid = $eml->header_raw('Message-ID') // _oidbin($oid) //
@@ -73,9 +73,9 @@ sub dedupe_mid () {
}
# our default deduplication strategy (used by v2, also)
-sub dedupe_content () {
- my $skv = PublicInbox::SharedKV->new;
- ($skv, sub { # may be called in a child process
+sub dedupe_content ($) {
+ my ($skv) = @_;
+ (sub { # may be called in a child process
my ($eml) = @_; # oid = $_[1], ignored
$skv->set_maybe(content_hash($eml), '');
}, sub {
@@ -86,7 +86,7 @@ sub dedupe_content () {
# no deduplication at all
sub true { 1 }
-sub dedupe_none () { (undef, \&true, \&true) }
+sub dedupe_none ($) { (\&true, \&true) }
sub new {
my ($cls, $lei, $dst) = @_;
@@ -94,10 +94,12 @@ sub new {
# allow "none" to bypass Eml->new if writing to directory:
return if ($dd eq 'none' && substr($dst // '', -1) eq '/');
+ my $m = "dedupe_$dd";
+ $cls->can($m) or die "unsupported dedupe strategy: $dd\n";
+ my $skv = $dd eq 'none' ? undef : PublicInbox::SharedKV->new;
- my $dd_new = $cls->can("dedupe_$dd") //
- die "unsupported dedupe strategy: $dd\n";
- bless [ $dd_new->() ], $cls; # [ $skv, $cb ]
+ # [ $skv, $eml_cb, $smsg_cb, "dedupe_$dd" ]
+ bless [ $skv, undef, undef, $m ], $cls;
}
# returns true on unseen messages according to the deduplication strategy,
@@ -115,6 +117,7 @@ sub is_smsg_dup {
sub prepare_dedupe {
my ($self) = @_;
my $skv = $self->[0];
+ $self->[1] or @$self[1,2] = $self->can($self->[3])->($skv);
$skv ? $skv->dbh : undef;
}
diff --git a/lib/PublicInbox/SharedKV.pm b/lib/PublicInbox/SharedKV.pm
index d75d8998..072c94ca 100644
--- a/lib/PublicInbox/SharedKV.pm
+++ b/lib/PublicInbox/SharedKV.pm
@@ -8,9 +8,10 @@ package PublicInbox::SharedKV;
use strict;
use v5.10.1;
use parent qw(PublicInbox::Lock);
-use File::Temp 0.19 (); # 0.19 for ->newdir
+use File::Temp qw(tempdir);
use DBI ();
use PublicInbox::Spawn;
+use File::Path qw(rmtree);
sub dbh {
my ($self, $lock) = @_;
@@ -44,8 +45,8 @@ sub new {
my ($cls, $dir, $base, $opt) = @_;
my $self = bless { opt => $opt }, $cls;
unless (defined $dir) {
- $self->{tmp} = File::Temp->newdir('kv-XXXXXX', TMPDIR => 1);
- $dir = $self->{tmp}->dirname;
+ $self->{tmpdir} = $dir = tempdir('skv-XXXXXX', TMPDIR => 1);
+ $self->{tmpid} = "$$.$self";
}
-d $dir or mkdir($dir) or die "mkdir($dir): $!";
$base //= '';
@@ -145,4 +146,9 @@ SELECT COUNT(k) FROM kv
$sth->fetchrow_array;
}
+sub DESTROY {
+ my ($self) = @_;
+ rmtree($self->{tmpdir}) if ($self->{tmpid} // '') eq "$$.$self";
+}
+
1;
diff --git a/t/lei_dedupe.t b/t/lei_dedupe.t
index 6e971b9b..bcb06a0a 100644
--- a/t/lei_dedupe.t
+++ b/t/lei_dedupe.t
@@ -17,8 +17,18 @@ my $smsg = bless { ds => time }, 'PublicInbox::Smsg';
$smsg->populate($eml);
$smsg->{$_} //= '' for (qw(to cc references)) ;
+my $check_storable = sub {
+ my ($x) = @_;
+ SKIP: {
+ require_mods('Storable', 1);
+ my $dup = Storable::thaw(Storable::freeze($x));
+ is_deeply($dup, $x, "$x->[3] round-trips through storable");
+ }
+};
+
my $lei = { opt => { dedupe => 'none' } };
my $dd = PublicInbox::LeiDedupe->new($lei);
+$check_storable->($dd);
$dd->prepare_dedupe;
ok(!$dd->is_dup($eml), '1st is_dup w/o dedupe');
ok(!$dd->is_dup($eml), '2nd is_dup w/o dedupe');
@@ -29,6 +39,7 @@ ok(!$dd->is_smsg_dup($smsg), 'smsg dedupe none 2');
for my $strat (undef, 'content') {
$lei->{opt}->{dedupe} = $strat;
$dd = PublicInbox::LeiDedupe->new($lei);
+ $check_storable->($dd);
$dd->prepare_dedupe;
my $desc = $strat // 'default';
ok(!$dd->is_dup($eml), "1st is_dup with $desc dedupe");
@@ -43,6 +54,7 @@ like($@, qr/unsupported.*bogus/, 'died on bogus strategy');
$lei->{opt}->{dedupe} = 'mid';
$dd = PublicInbox::LeiDedupe->new($lei);
+$check_storable->($dd);
$dd->prepare_dedupe;
ok(!$dd->is_dup($eml), '1st is_dup with mid dedupe');
ok($dd->is_dup($eml), '2nd seen with mid dedupe');
@@ -52,6 +64,7 @@ ok($dd->is_smsg_dup($smsg), 'smsg mid dedupe reject');
$lei->{opt}->{dedupe} = 'oid';
$dd = PublicInbox::LeiDedupe->new($lei);
+$check_storable->($dd);
$dd->prepare_dedupe;
# --augment won't have OIDs:
next prev parent reply other threads:[~2021-01-14 7:06 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-01-14 7:06 [PATCH 00/14] lei: another pile of changes Eric Wong
2021-01-14 7:06 ` [PATCH 01/14] cmd_ipc: support + test EINTR + EAGAIN, no FDs Eric Wong
2021-01-14 7:06 ` [PATCH 02/14] lei: test SIGPIPE, stop xsearch workers on client abort Eric Wong
2021-01-14 7:06 ` [PATCH 03/14] daemon+watch: fix localization of %SIG for non-signalfd users Eric Wong
2021-01-14 7:06 ` [PATCH 04/14] lei: do not unlink socket path at exit Eric Wong
2021-01-14 7:06 ` [PATCH 05/14] lei: reduce live FD references in wq child Eric Wong
2021-01-14 7:06 ` [PATCH 06/14] lei: rely on localized $current_lei for warnings Eric Wong
2021-01-14 7:06 ` Eric Wong [this message]
2021-01-14 7:06 ` [PATCH 08/14] lei q: reinstate smsg dedupe Eric Wong
2021-01-14 7:06 ` [PATCH 09/14] search: rename "ts:" prefix to "rt:" Eric Wong
2021-01-14 7:06 ` [PATCH 10/14] lei_overview: rename "references" to "refs" Eric Wong
2021-01-14 7:06 ` [PATCH 11/14] lei: q: lock stdout on overview output Eric Wong
2021-01-15 0:18 ` Eric Wong
2021-01-14 7:06 ` [PATCH 12/14] leixsearch: remove some commented out code Eric Wong
2021-01-14 7:06 ` [PATCH 13/14] lei: remove temporary var on open Eric Wong
2021-01-14 7:06 ` [PATCH 14/14] lei: pass FD to CWD via cmsg, use fchdir on server Eric Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: http://public-inbox.org/README
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210114070627.18195-8-e@80x24.org \
--to=e@80x24.org \
--cc=meta@public-inbox.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).