user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download mbox.gz: |
* [PATCH 07/14] lei_dedupe+shared_kv: ensure round-tripping serialization
  2021-01-14  7:06  7% [PATCH 00/14] lei: another pile of changes Eric Wong
@ 2021-01-14  7:06  6% ` Eric Wong
  0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2021-01-14  7:06 UTC (permalink / raw)
  To: meta

We'll be passing these objects via PublicInbox::IPC which uses
Storable (or Sereal), so ensure they're safe to use after
serialization.
---
 lib/PublicInbox/LeiDedupe.pm | 29 ++++++++++++++++-------------
 lib/PublicInbox/SharedKV.pm  | 12 +++++++++---
 t/lei_dedupe.t               | 13 +++++++++++++
 3 files changed, 38 insertions(+), 16 deletions(-)

diff --git a/lib/PublicInbox/LeiDedupe.pm b/lib/PublicInbox/LeiDedupe.pm
index 58eee533..81754361 100644
--- a/lib/PublicInbox/LeiDedupe.pm
+++ b/lib/PublicInbox/LeiDedupe.pm
@@ -43,9 +43,9 @@ sub smsg_hash ($) {
 }
 
 # the paranoid option
-sub dedupe_oid () {
-	my $skv = PublicInbox::SharedKV->new;
-	($skv, sub { # may be called in a child process
+sub dedupe_oid ($) {
+	my ($skv) = @_;
+	(sub { # may be called in a child process
 		my ($eml, $oid) = @_;
 		$skv->set_maybe(_oidbin($oid) // _regen_oid($eml), '');
 	}, sub {
@@ -55,9 +55,9 @@ sub dedupe_oid () {
 }
 
 # dangerous if there's duplicate messages with different Message-IDs
-sub dedupe_mid () {
-	my $skv = PublicInbox::SharedKV->new;
-	($skv, sub { # may be called in a child process
+sub dedupe_mid ($) {
+	my ($skv) = @_;
+	(sub { # may be called in a child process
 		my ($eml, $oid) = @_;
 		# TODO: lei will support non-public messages w/o Message-ID
 		my $mid = $eml->header_raw('Message-ID') // _oidbin($oid) //
@@ -73,9 +73,9 @@ sub dedupe_mid () {
 }
 
 # our default deduplication strategy (used by v2, also)
-sub dedupe_content () {
-	my $skv = PublicInbox::SharedKV->new;
-	($skv, sub { # may be called in a child process
+sub dedupe_content ($) {
+	my ($skv) = @_;
+	(sub { # may be called in a child process
 		my ($eml) = @_; # oid = $_[1], ignored
 		$skv->set_maybe(content_hash($eml), '');
 	}, sub {
@@ -86,7 +86,7 @@ sub dedupe_content () {
 
 # no deduplication at all
 sub true { 1 }
-sub dedupe_none () { (undef, \&true, \&true) }
+sub dedupe_none ($) { (\&true, \&true) }
 
 sub new {
 	my ($cls, $lei, $dst) = @_;
@@ -94,10 +94,12 @@ sub new {
 
 	# allow "none" to bypass Eml->new if writing to directory:
 	return if ($dd eq 'none' && substr($dst // '', -1) eq '/');
+	my $m = "dedupe_$dd";
+	$cls->can($m) or die "unsupported dedupe strategy: $dd\n";
+	my $skv = $dd eq 'none' ? undef : PublicInbox::SharedKV->new;
 
-	my $dd_new = $cls->can("dedupe_$dd") //
-			die "unsupported dedupe strategy: $dd\n";
-	bless [ $dd_new->() ], $cls; # [ $skv, $cb ]
+	# [ $skv, $eml_cb, $smsg_cb, "dedupe_$dd" ]
+	bless [ $skv, undef, undef, $m ], $cls;
 }
 
 # returns true on unseen messages according to the deduplication strategy,
@@ -115,6 +117,7 @@ sub is_smsg_dup {
 sub prepare_dedupe {
 	my ($self) = @_;
 	my $skv = $self->[0];
+	$self->[1] or @$self[1,2] = $self->can($self->[3])->($skv);
 	$skv ? $skv->dbh : undef;
 }
 
diff --git a/lib/PublicInbox/SharedKV.pm b/lib/PublicInbox/SharedKV.pm
index d75d8998..072c94ca 100644
--- a/lib/PublicInbox/SharedKV.pm
+++ b/lib/PublicInbox/SharedKV.pm
@@ -8,9 +8,10 @@ package PublicInbox::SharedKV;
 use strict;
 use v5.10.1;
 use parent qw(PublicInbox::Lock);
-use File::Temp 0.19 (); # 0.19 for ->newdir
+use File::Temp qw(tempdir);
 use DBI ();
 use PublicInbox::Spawn;
+use File::Path qw(rmtree);
 
 sub dbh {
 	my ($self, $lock) = @_;
@@ -44,8 +45,8 @@ sub new {
 	my ($cls, $dir, $base, $opt) = @_;
 	my $self = bless { opt => $opt }, $cls;
 	unless (defined $dir) {
-		$self->{tmp} = File::Temp->newdir('kv-XXXXXX', TMPDIR => 1);
-		$dir = $self->{tmp}->dirname;
+		$self->{tmpdir} = $dir = tempdir('skv-XXXXXX', TMPDIR => 1);
+		$self->{tmpid} = "$$.$self";
 	}
 	-d $dir or mkdir($dir) or die "mkdir($dir): $!";
 	$base //= '';
@@ -145,4 +146,9 @@ SELECT COUNT(k) FROM kv
 	$sth->fetchrow_array;
 }
 
+sub DESTROY {
+	my ($self) = @_;
+	rmtree($self->{tmpdir}) if ($self->{tmpid} // '') eq "$$.$self";
+}
+
 1;
diff --git a/t/lei_dedupe.t b/t/lei_dedupe.t
index 6e971b9b..bcb06a0a 100644
--- a/t/lei_dedupe.t
+++ b/t/lei_dedupe.t
@@ -17,8 +17,18 @@ my $smsg = bless { ds => time }, 'PublicInbox::Smsg';
 $smsg->populate($eml);
 $smsg->{$_} //= '' for (qw(to cc references)) ;
 
+my $check_storable = sub {
+	my ($x) = @_;
+	SKIP: {
+		require_mods('Storable', 1);
+		my $dup = Storable::thaw(Storable::freeze($x));
+		is_deeply($dup, $x, "$x->[3] round-trips through storable");
+	}
+};
+
 my $lei = { opt => { dedupe => 'none' } };
 my $dd = PublicInbox::LeiDedupe->new($lei);
+$check_storable->($dd);
 $dd->prepare_dedupe;
 ok(!$dd->is_dup($eml), '1st is_dup w/o dedupe');
 ok(!$dd->is_dup($eml), '2nd is_dup w/o dedupe');
@@ -29,6 +39,7 @@ ok(!$dd->is_smsg_dup($smsg), 'smsg dedupe none 2');
 for my $strat (undef, 'content') {
 	$lei->{opt}->{dedupe} = $strat;
 	$dd = PublicInbox::LeiDedupe->new($lei);
+	$check_storable->($dd);
 	$dd->prepare_dedupe;
 	my $desc = $strat // 'default';
 	ok(!$dd->is_dup($eml), "1st is_dup with $desc dedupe");
@@ -43,6 +54,7 @@ like($@, qr/unsupported.*bogus/, 'died on bogus strategy');
 
 $lei->{opt}->{dedupe} = 'mid';
 $dd = PublicInbox::LeiDedupe->new($lei);
+$check_storable->($dd);
 $dd->prepare_dedupe;
 ok(!$dd->is_dup($eml), '1st is_dup with mid dedupe');
 ok($dd->is_dup($eml), '2nd seen with mid dedupe');
@@ -52,6 +64,7 @@ ok($dd->is_smsg_dup($smsg), 'smsg mid dedupe reject');
 
 $lei->{opt}->{dedupe} = 'oid';
 $dd = PublicInbox::LeiDedupe->new($lei);
+$check_storable->($dd);
 $dd->prepare_dedupe;
 
 # --augment won't have OIDs:

^ permalink raw reply related	[relevance 6%]

* [PATCH 00/14] lei: another pile of changes
@ 2021-01-14  7:06  7% Eric Wong
  2021-01-14  7:06  6% ` [PATCH 07/14] lei_dedupe+shared_kv: ensure round-tripping serialization Eric Wong
  0 siblings, 1 reply; 2+ results
From: Eric Wong @ 2021-01-14  7:06 UTC (permalink / raw)
  To: meta

PATCH 2/14 took forever to figure out; turns out I was hunting
an old bug in Perl :x (and led to PATCH 3/14, too)

We could probably go farther on 5/14 and eliminate the
need for @TO_CLOSE_ATFORK_CHILD completely, but my brain
was fried from 2/14 :x.

The "ts:" => "rt:" change is technically user-visible,
but "ts:" was never publicly documented so I doubt it
affects anybody.  "rt:" (received time) may be documented
in the future.

Eric Wong (14):
  cmd_ipc: support + test EINTR + EAGAIN, no FDs
  lei: test SIGPIPE, stop xsearch workers on client abort
  daemon+watch: fix localization of %SIG for non-signalfd users
  lei: do not unlink socket path at exit
  lei: reduce live FD references in wq child
  lei: rely on localized $current_lei for warnings
  lei_dedupe+shared_kv: ensure round-tripping serialization
  lei q: reinstate smsg dedupe
  search: rename "ts:" prefix to "rt:"
  lei_overview: rename "references" to "refs"
  lei: q: lock stdout on overview output
  leixsearch: remove some commented out code
  lei: remove temporary var on open
  lei: pass FD to CWD via cmsg, use fchdir on server

 MANIFEST                        |   2 +
 lib/PublicInbox/CmdIPC4.pm      |   6 +-
 lib/PublicInbox/Daemon.pm       |   4 +-
 lib/PublicInbox/IMAPsearchqp.pm |   6 +-
 lib/PublicInbox/IPC.pm          |  45 +++-----
 lib/PublicInbox/LEI.pm          | 182 +++++++++++++++++---------------
 lib/PublicInbox/LeiDedupe.pm    |  29 ++---
 lib/PublicInbox/LeiOverview.pm  |  43 +++++++-
 lib/PublicInbox/LeiQuery.pm     |  27 ++---
 lib/PublicInbox/LeiXSearch.pm   |  60 +++++++----
 lib/PublicInbox/Lock.pm         |   2 +-
 lib/PublicInbox/Search.pm       |   2 +-
 lib/PublicInbox/SharedKV.pm     |  12 ++-
 lib/PublicInbox/Spawn.pm        |  13 ++-
 script/lei                      |  88 +++++++++------
 script/public-inbox-watch       |   2 +-
 t/cmd_ipc.t                     |  32 ++++++
 t/imap_searchqp.t               |   6 +-
 t/lei.t                         |  33 +-----
 t/lei_dedupe.t                  |  13 +++
 t/lei_overview.t                |  33 ++++++
 xt/lei-sigpipe.t                |  32 ++++++
 22 files changed, 417 insertions(+), 255 deletions(-)
 create mode 100644 t/lei_overview.t
 create mode 100644 xt/lei-sigpipe.t

^ permalink raw reply	[relevance 7%]

Results 1-2 of 2 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2021-01-14  7:06  7% [PATCH 00/14] lei: another pile of changes Eric Wong
2021-01-14  7:06  6% ` [PATCH 07/14] lei_dedupe+shared_kv: ensure round-tripping serialization Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).