user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download mbox.gz: |
* [PATCH 9/9] lei import|tag|rm: support --commit-delay=SECONDS
  2023-10-11  7:20  7% [PATCH 0/9] lei + import-related updates Eric Wong
@ 2023-10-11  7:20  5% ` Eric Wong
  0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2023-10-11  7:20 UTC (permalink / raw)
  To: meta

Delayed commits  allows users to trade off immediate safety for
throughput and reduced storage wear when running multiple
discreet commands.

This feature is currently useful for providing a way to make
t/lei-store-fail.t reliable and for ensuring `lei blob' can
retrieve messages which have not yet been committed.

In the future, it'll also be useful for the FUSE layer to batch
git activity.
---
 lib/PublicInbox/LEI.pm      | 23 ++++++++++++++---------
 lib/PublicInbox/LeiStore.pm |  6 ++++++
 t/lei-import.t              | 13 +++++++++++++
 t/lei-store-fail.t          | 20 +++++++++++++-------
 t/lei-tag.t                 | 15 ++++++++++++++-
 5 files changed, 60 insertions(+), 17 deletions(-)

diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index e2b3c0d9..af39f8af 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -231,13 +231,13 @@ our %CMD = ( # sorted in order of importance/use:
 'rm' => [ '--stdin|LOCATION...',
 	'remove a message from the index and prevent reindexing',
 	'stdin|', # /|\z/ must be first for lone dash
-	qw(in-format|F=s lock=s@), @net_opt, @c_opt ],
+	qw(in-format|F=s lock=s@ commit-delay=i), @net_opt, @c_opt ],
 'plonk' => [ '--threads|--from=IDENT',
 	'exclude mail matching From: or threads from non-Message-ID searches',
 	qw(stdin| threads|t from|f=s mid=s oid=s), @c_opt ],
-'tag' => [ 'KEYWORDS... LOCATION...|--stdin',
+tag => [ 'KEYWORDS... LOCATION...|--stdin',
 	'set/unset keywords and/or labels on message(s)',
-	qw(stdin| in-format|F=s input|i=s@ oid=s@ mid=s@),
+	qw(stdin| in-format|F=s input|i=s@ oid=s@ mid=s@ commit-delay=i),
 	@net_opt, @c_opt, pass_through('-kw:foo for delete') ],
 
 'purge-mailsource' => [ 'LOCATION|--all',
@@ -262,10 +262,11 @@ our %CMD = ( # sorted in order of importance/use:
 	qw(in-format|F=s kw! offset=i recursive|r exclude=s include|I=s
 	verbose|v+ incremental!), @net_opt, # mainly for --proxy=
 	 @c_opt ],
-'import' => [ 'LOCATION...|--stdin [LABELS...]',
+import => [ 'LOCATION...|--stdin [LABELS...]',
 	'one-time import/update from URL or filesystem',
 	qw(stdin| offset=i recursive|r exclude=s include|I=s new-only
-	lock=s@ in-format|F=s kw! verbose|v+ incremental! mail-sync!),
+	lock=s@ in-format|F=s kw! verbose|v+ incremental! mail-sync!
+	commit-delay=i),
 	@net_opt, @c_opt ],
 'forget-mail-sync' => [ 'LOCATION...',
 	'forget sync information for a mail folder', @c_opt ],
@@ -1539,10 +1540,14 @@ sub sto_done_request {
 	my ($lei, $wq) = @_;
 	return unless $lei->{sto} && $lei->{sto}->{-wq_s1};
 	local $current_lei = $lei;
-	my $s = ($wq ? $wq->{lei_sock} : undef) // $lei->{sock};
-	my $errfh = $lei->{2} // *STDERR{GLOB};
-	my @io = $s ? ($errfh, $s) : ($errfh);
-	eval { $lei->{sto}->wq_io_do('done', \@io) };
+	if (my $n = $lei->{opt}->{'commit-delay'}) {
+		eval { $lei->{sto}->wq_do('schedule_commit', $n) };
+	} else {
+		my $s = ($wq ? $wq->{lei_sock} : undef) // $lei->{sock};
+		my $errfh = $lei->{2} // *STDERR{GLOB};
+		my @io = $s ? ($errfh, $s) : ($errfh);
+		eval { $lei->{sto}->wq_io_do('done', \@io) };
+	}
 	warn($@) if $@;
 }
 
diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm
index 9c07af14..aebb85a9 100644
--- a/lib/PublicInbox/LeiStore.pm
+++ b/lib/PublicInbox/LeiStore.pm
@@ -34,6 +34,7 @@ use Sys::Syslog qw(syslog openlog);
 use Errno qw(EEXIST ENOENT);
 use PublicInbox::Syscall qw(rename_noreplace);
 use PublicInbox::LeiStoreErr;
+use PublicInbox::DS qw(add_uniq_timer);
 
 sub new {
 	my (undef, $dir, $opt) = @_;
@@ -113,6 +114,11 @@ sub cat_blob {
 	$self->{im} ? $self->{im}->cat_blob($oid) : undef;
 }
 
+sub schedule_commit {
+	my ($self, $sec) = @_;
+	add_uniq_timer($self->{priv_eidx}->{topdir}, $sec, \&done, $self);
+}
+
 # follows the stderr file
 sub _tail_err {
 	my ($self) = @_;
diff --git a/t/lei-import.t b/t/lei-import.t
index 8b09d3aa..b2c1de9b 100644
--- a/t/lei-import.t
+++ b/t/lei-import.t
@@ -2,6 +2,7 @@
 # Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 use v5.12; use PublicInbox::TestCommon;
+use PublicInbox::DS qw(now);
 use autodie qw(open close);
 test_lei(sub {
 ok(!lei(qw(import -F bogus), 't/plack-qp.eml'), 'fails with bogus format');
@@ -141,6 +142,18 @@ $res = json_utf8->decode($lei_out);
 is_deeply($res->[0]->{kw}, [qw(answered flagged seen)], 'keyword added');
 is_deeply($res->[0]->{L}, [qw(boombox inbox)], 'labels preserved');
 
+lei_ok qw(import --commit-delay=1 +L:bin -F eml t/data/binary.patch);
+lei_ok 'ls-label';
+unlike($lei_out, qr/\bbin\b/, 'commit-delay delays label');
+my $end = now + 10;
+my $n = 1;
+diag 'waiting for lei/store commit...';
+do {
+	tick $n;
+	$n = 0.1;
+} until (!lei('ls-label') || $lei_out =~ /\bbin\b/ || now > $end);
+like($lei_out, qr/\bbin\b/, 'commit-delay eventually commits');
+
 # see t/lei_to_mail.t for "import -F mbox*"
 });
 done_testing;
diff --git a/t/lei-store-fail.t b/t/lei-store-fail.t
index fb0f2b75..c2f03148 100644
--- a/t/lei-store-fail.t
+++ b/t/lei-store-fail.t
@@ -9,8 +9,11 @@ use Fcntl qw(SEEK_SET);
 use File::Path qw(remove_tree);
 
 my $start_home = $ENV{HOME}; # bug guard
+my $utf8_oid = '9bf1002c49eb075df47247b74d69bcd555e23422';
 test_lei(sub {
 	lei_ok qw(import -q t/plack-qp.eml); # start the store
+	ok(!lei(qw(blob --mail), $utf8_oid), 't/utf8.eml not imported, yet');
+
 	my $opt;
 	pipe($opt->{0}, my $in_w);
 	open $opt->{1}, '+>', undef;
@@ -20,27 +23,30 @@ test_lei(sub {
 	my $tp = start_script($cmd, undef, $opt);
 	close $opt->{0};
 	$in_w->autoflush(1);
-	for (1..500) { # need to fill up 64k read buffer
-		print $in_w <<EOM or xbail "print $!";
+	print $in_w <<EOM or xbail "print: $!";
 From k\@y Fri Oct  2 00:00:00 1993
 From: <k\@example.com>
 Date: Sat, 02 Oct 2010 00:00:00 +0000
 Subject: hi
-Message-ID: <$_\@t>
+Message-ID: <0\@t>
 
 will this save?
 EOM
-	}
-	tick 0.2; # XXX ugh, this is so hacky
+	# import another message w/ delay while mboxrd import is still running
+	lei_ok qw(import -q --commit-delay=300 t/utf8.eml);
+	lei_ok qw(blob --mail), $utf8_oid,
+		\'blob immediately available despite --commit-delay';
+	lei_ok qw(q m:testmessage@example.com);
+	is($lei_out, "[null]\n", 'delayed commit is unindexed');
 
-	# make sto_done_request fail:
+	# make immediate ->sto_done_request fail from mboxrd import:
 	remove_tree("$ENV{HOME}/.local/share/lei/store");
 	# subsequent lei commands are undefined behavior,
 	# but we need to make sure the current lei command fails:
 
 	close $in_w; # should trigger ->done
 	$tp->join;
-	isnt($?, 0, 'lei import error code set on failure');
+	isnt($?, 0, 'lei import -F mboxrd error code set on failure');
 	is(-s $opt->{1}, 0, 'nothing in stdout');
 	isnt(-s $opt->{2}, 0, 'stderr not empty');
 	seek($opt->{2}, 0, SEEK_SET);
diff --git a/t/lei-tag.t b/t/lei-tag.t
index cccf0af6..7278dfcd 100644
--- a/t/lei-tag.t
+++ b/t/lei-tag.t
@@ -1,9 +1,10 @@
 #!perl -w
 # Copyright (C) 2021 all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
-use strict; use v5.10.1; use PublicInbox::TestCommon;
+use v5.12; use PublicInbox::TestCommon;
 require_git 2.6;
 require_mods(qw(json DBD::SQLite Xapian));
+use PublicInbox::DS qw(now);
 my ($ro_home, $cfg_path) = setup_public_inboxes;
 my $check_kw = sub {
 	my ($exp, %opt) = @_;
@@ -104,5 +105,17 @@ test_lei(sub {
 	lei_ok qw(tag +L:nope -F eml t/data/binary.patch);
 	like $lei_err, qr/\b1 unimported messages/, 'noted unimported'
 		or diag $lei_err;
+
+	lei_ok qw(tag -F eml --commit-delay=1 t/utf8.eml +L:utf8);
+	lei_ok 'ls-label';
+	unlike($lei_out, qr/\butf8\b/, 'commit-delay delays label');
+	my $end = now + 10;
+	my $n = 1;
+	diag 'waiting for lei/store commit...';
+	do {
+		tick $n;
+		$n = 0.1;
+	} until (!lei('ls-label') || $lei_out =~ /\butf8\b/ || now > $end);
+	like($lei_out, qr/\butf8\b/, 'commit-delay eventually commits');
 });
 done_testing;

^ permalink raw reply related	[relevance 5%]

* [PATCH 0/9] lei + import-related updates
@ 2023-10-11  7:20  7% Eric Wong
  2023-10-11  7:20  5% ` [PATCH 9/9] lei import|tag|rm: support --commit-delay=SECONDS Eric Wong
  0 siblings, 1 reply; 2+ results
From: Eric Wong @ 2023-10-11  7:20 UTC (permalink / raw)
  To: meta

A few more ProcessIO conversions to start with, and then
cleanups while I started working on import-related stuff.
Some of this will tie in nicely for FUSE, too...

I've realized msgtime messages were pointless anyways since
there's nothing anybody can really do about bad messages that
get through various upstream spam filters.

5/9 is a long-overdue cleanup I noticed while going
over Import.pm

9/9 ought to fix the fragile t/lei-store-fail.t test
by using new features.

Eric Wong (9):
  lei rediff: use ProcessIO for --drq support
  lei_xsearch: improve curl progress reporting
  msgtime: quiet warnings we can do nothing about
  msgtime: simplify msg_timestamp and msg_datestamp
  treewide: consolidate "From " line removal
  import: switch to Unix stream socket for fast-import
  import: cat_blob is a no-op w/o live fast-import
  lei blob: run cat_blob on lei/store for pending blobs
  lei import|tag|rm: support --commit-delay=SECONDS

 lib/PublicInbox/Eml.pm        |   6 ++
 lib/PublicInbox/IMAP.pm       |   2 +-
 lib/PublicInbox/Import.pm     | 138 ++++++++++++++++------------------
 lib/PublicInbox/LEI.pm        |  23 +++---
 lib/PublicInbox/LeiBlob.pm    |  16 ++--
 lib/PublicInbox/LeiInput.pm   |   5 +-
 lib/PublicInbox/LeiInspect.pm |   2 +-
 lib/PublicInbox/LeiRediff.pm  |  33 ++++----
 lib/PublicInbox/LeiStore.pm   |  11 +++
 lib/PublicInbox/LeiToMail.pm  |   3 +-
 lib/PublicInbox/LeiXSearch.pm |  34 +++++----
 lib/PublicInbox/Mbox.pm       |  16 ++--
 lib/PublicInbox/MboxReader.pm |   2 +-
 lib/PublicInbox/MsgTime.pm    |  49 +++++-------
 lib/PublicInbox/NNTP.pm       |   3 +-
 lib/PublicInbox/ProcessIO.pm  |  18 ++---
 lib/PublicInbox/Spawn.pm      |   1 +
 script/public-inbox-convert   |  18 ++---
 script/public-inbox-edit      |   5 +-
 script/public-inbox-learn     |   2 +-
 script/public-inbox-mda       |   4 +-
 script/public-inbox-purge     |   4 +-
 t/lei-import.t                |  13 ++++
 t/lei-store-fail.t            |  20 +++--
 t/lei-tag.t                   |  15 +++-
 25 files changed, 230 insertions(+), 213 deletions(-)


^ permalink raw reply	[relevance 7%]

Results 1-2 of 2 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2023-10-11  7:20  7% [PATCH 0/9] lei + import-related updates Eric Wong
2023-10-11  7:20  5% ` [PATCH 9/9] lei import|tag|rm: support --commit-delay=SECONDS Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).