user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
* [PATCH] lei up: fix dedupe with remote externals on Maildir + IMAP
@ 2021-05-03 20:57 Eric Wong
  0 siblings, 0 replies; only message in thread
From: Eric Wong @ 2021-05-03 20:57 UTC (permalink / raw)
  To: meta

LeiToMail Maildir and IMAP write callbacks need to account for
the caller-supplied smsg.  We'll also make better use of the
user-supplied smsg object by ensuring blob deduplication happens
ASAP.

Fixes: e76683309ca4f254 ("lei <q|up>: distinguish between mset and l2m counts")
---
 lib/PublicInbox/LeiSavedSearch.pm | 15 ++++++++-------
 lib/PublicInbox/LeiToMail.pm      |  6 ++++--
 t/lei-q-remote-import.t           |  6 ++++++
 3 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/lib/PublicInbox/LeiSavedSearch.pm b/lib/PublicInbox/LeiSavedSearch.pm
index 8177c98e..92ced28b 100644
--- a/lib/PublicInbox/LeiSavedSearch.pm
+++ b/lib/PublicInbox/LeiSavedSearch.pm
@@ -170,23 +170,24 @@ sub cfg_set { # called by LeiXSearch
 sub is_dup {
 	my ($self, $eml, $smsg) = @_;
 	my $oidx = $self->{oidx} // die 'BUG: no {oidx}';
-	my $blob = $smsg ? $smsg->{blob} : undef;
-	my $lk = $self->lock_for_scope_fast;
-	return 1 if $blob && $oidx->blob_exists($blob);
+	my $lk;
 	if ($self->{-dedupe_mid}) {
+		$lk //= $self->lock_for_scope_fast;
 		for my $mid (@{mids_for_index($eml)}) {
 			my ($id, $prv);
 			return 1 if $oidx->next_by_mid($mid, \$id, \$prv);
 		}
 	}
+	my $blob = $smsg ? $smsg->{blob} : git_sha(1, $eml)->hexdigest;
+	$lk //= $self->lock_for_scope_fast;
+	return 1 if $oidx->blob_exists($blob);
 	if (my $xoids = PublicInbox::LeiSearch::xoids_for($self, $eml, 1)) {
 		for my $docid (values %$xoids) {
 			$oidx->add_xref3($docid, -1, $blob, '.');
 		}
 		$oidx->commit_lazy;
 		if ($self->{-dedupe_oid}) {
-			$smsg->{blob} //= git_sha(1, $eml)->hexdigest;
-			exists $xoids->{$smsg->{blob}} ? 1 : undef;
+			exists $xoids->{$blob} ? 1 : undef;
 		} else {
 			1;
 		}
@@ -197,11 +198,11 @@ sub is_dup {
 			$smsg->{bytes} = 0;
 			$smsg->populate($eml);
 		}
+		$smsg->{blob} //= $blob;
 		$oidx->begin_lazy;
 		$smsg->{num} = $oidx->adj_counter('eidx_docid', '+');
-		$smsg->{blob} //= git_sha(1, $eml)->hexdigest;
 		$oidx->add_overview($eml, $smsg);
-		$oidx->add_xref3($smsg->{num}, -1, $smsg->{blob}, '.');
+		$oidx->add_xref3($smsg->{num}, -1, $blob, '.');
 		$oidx->commit_lazy;
 		undef;
 	}
diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm
index 71acf952..64061788 100644
--- a/lib/PublicInbox/LeiToMail.pm
+++ b/lib/PublicInbox/LeiToMail.pm
@@ -283,7 +283,8 @@ sub _maildir_write_cb ($$) {
 		my ($bref, $smsg, $eml) = @_;
 		$dst // return $lei->fail; # dst may be undef-ed in last run
 		return if $dedupe && $dedupe->is_dup($eml //
-						PublicInbox::Eml->new($$bref));
+						PublicInbox::Eml->new($$bref),
+						$smsg);
 		$lse->xsmsg_vmd($smsg) if $lse;
 		my $n = _buf2maildir($dst, $bref // \($eml->as_string), $smsg);
 		$sto->ipc_do('set_sync_info', $smsg->{blob}, $out, $n) if $sto;
@@ -305,7 +306,8 @@ sub _imap_write_cb ($$) {
 		my ($bref, $smsg, $eml) = @_;
 		$mic // return $lei->fail; # mic may be undef-ed in last run
 		return if $dedupe && $dedupe->is_dup($eml //
-						PublicInbox::Eml->new($$bref));
+						PublicInbox::Eml->new($$bref),
+						$smsg);
 		$lse->xsmsg_vmd($smsg) if $lse;
 		my $uid = eval { $append->($mic, $folder, $bref, $smsg, $eml) };
 		if (my $err = $@) {
diff --git a/t/lei-q-remote-import.t b/t/lei-q-remote-import.t
index 32c5172b..80067061 100644
--- a/t/lei-q-remote-import.t
+++ b/t/lei-q-remote-import.t
@@ -91,5 +91,11 @@ EOF
 	lei_ok(qw(q -o mboxrd:/dev/stdout m:never-before-seen@example.com));
 	like($lei_out, qr/seen\@example\.com>\nStatus: RO\n\nwhatever/sm,
 		'--import-before imported totally unseen message');
+
+	lei_ok(qw(q --save z:0.. -o), "$ENV{HOME}/md", '--only', $url);
+	my @f = glob("$ENV{HOME}/md/*/*");
+	lei_ok('up', "$ENV{HOME}/md");
+	is_deeply(\@f, [ glob("$ENV{HOME}/md/*/*") ],
+		'lei up remote dedupe works on maildir');
 });
 done_testing;

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-05-03 20:57 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-05-03 20:57 [PATCH] lei up: fix dedupe with remote externals on Maildir + IMAP Eric Wong

user/dev discussion of public-inbox itself

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://public-inbox.org/meta
	git clone --mirror http://czquwvybam4bgbro.onion/meta
	git clone --mirror http://hjrcffqmbrq6wope.onion/meta
	git clone --mirror http://ou63pmih66umazou.onion/meta

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V1 meta meta/ https://public-inbox.org/meta \
		meta@public-inbox.org
	public-inbox-index meta

Example config snippet for mirrors.
Newsgroups are available over NNTP:
	nntp://news.public-inbox.org/inbox.comp.mail.public-inbox.meta
	nntp://7fh6tueqddpjyxjmgtdiueylzoqt6pt7hec3pukyptlmohoowvhde4yd.onion/inbox.comp.mail.public-inbox.meta
	nntp://ie5yzdi7fg72h7s4sdcztq5evakq23rdt33mfyfcddc5u3ndnw24ogqd.onion/inbox.comp.mail.public-inbox.meta
	nntp://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/inbox.comp.mail.public-inbox.meta
	nntp://news.gmane.io/gmane.mail.public-inbox.general
 note: .onion URLs require Tor: https://www.torproject.org/

code repositories for project(s) associated with this inbox:

	https://80x24.org/public-inbox.git

AGPL code for this site: git clone https://public-inbox.org/public-inbox.git