user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH] lei up: fix dedupe with remote externals on Maildir + IMAP
Date: Mon,  3 May 2021 20:57:31 +0000	[thread overview]
Message-ID: <20210503205731.8747-1-e@80x24.org> (raw)

LeiToMail Maildir and IMAP write callbacks need to account for
the caller-supplied smsg.  We'll also make better use of the
user-supplied smsg object by ensuring blob deduplication happens
ASAP.

Fixes: e76683309ca4f254 ("lei <q|up>: distinguish between mset and l2m counts")
---
 lib/PublicInbox/LeiSavedSearch.pm | 15 ++++++++-------
 lib/PublicInbox/LeiToMail.pm      |  6 ++++--
 t/lei-q-remote-import.t           |  6 ++++++
 3 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/lib/PublicInbox/LeiSavedSearch.pm b/lib/PublicInbox/LeiSavedSearch.pm
index 8177c98e..92ced28b 100644
--- a/lib/PublicInbox/LeiSavedSearch.pm
+++ b/lib/PublicInbox/LeiSavedSearch.pm
@@ -170,23 +170,24 @@ sub cfg_set { # called by LeiXSearch
 sub is_dup {
 	my ($self, $eml, $smsg) = @_;
 	my $oidx = $self->{oidx} // die 'BUG: no {oidx}';
-	my $blob = $smsg ? $smsg->{blob} : undef;
-	my $lk = $self->lock_for_scope_fast;
-	return 1 if $blob && $oidx->blob_exists($blob);
+	my $lk;
 	if ($self->{-dedupe_mid}) {
+		$lk //= $self->lock_for_scope_fast;
 		for my $mid (@{mids_for_index($eml)}) {
 			my ($id, $prv);
 			return 1 if $oidx->next_by_mid($mid, \$id, \$prv);
 		}
 	}
+	my $blob = $smsg ? $smsg->{blob} : git_sha(1, $eml)->hexdigest;
+	$lk //= $self->lock_for_scope_fast;
+	return 1 if $oidx->blob_exists($blob);
 	if (my $xoids = PublicInbox::LeiSearch::xoids_for($self, $eml, 1)) {
 		for my $docid (values %$xoids) {
 			$oidx->add_xref3($docid, -1, $blob, '.');
 		}
 		$oidx->commit_lazy;
 		if ($self->{-dedupe_oid}) {
-			$smsg->{blob} //= git_sha(1, $eml)->hexdigest;
-			exists $xoids->{$smsg->{blob}} ? 1 : undef;
+			exists $xoids->{$blob} ? 1 : undef;
 		} else {
 			1;
 		}
@@ -197,11 +198,11 @@ sub is_dup {
 			$smsg->{bytes} = 0;
 			$smsg->populate($eml);
 		}
+		$smsg->{blob} //= $blob;
 		$oidx->begin_lazy;
 		$smsg->{num} = $oidx->adj_counter('eidx_docid', '+');
-		$smsg->{blob} //= git_sha(1, $eml)->hexdigest;
 		$oidx->add_overview($eml, $smsg);
-		$oidx->add_xref3($smsg->{num}, -1, $smsg->{blob}, '.');
+		$oidx->add_xref3($smsg->{num}, -1, $blob, '.');
 		$oidx->commit_lazy;
 		undef;
 	}
diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm
index 71acf952..64061788 100644
--- a/lib/PublicInbox/LeiToMail.pm
+++ b/lib/PublicInbox/LeiToMail.pm
@@ -283,7 +283,8 @@ sub _maildir_write_cb ($$) {
 		my ($bref, $smsg, $eml) = @_;
 		$dst // return $lei->fail; # dst may be undef-ed in last run
 		return if $dedupe && $dedupe->is_dup($eml //
-						PublicInbox::Eml->new($$bref));
+						PublicInbox::Eml->new($$bref),
+						$smsg);
 		$lse->xsmsg_vmd($smsg) if $lse;
 		my $n = _buf2maildir($dst, $bref // \($eml->as_string), $smsg);
 		$sto->ipc_do('set_sync_info', $smsg->{blob}, $out, $n) if $sto;
@@ -305,7 +306,8 @@ sub _imap_write_cb ($$) {
 		my ($bref, $smsg, $eml) = @_;
 		$mic // return $lei->fail; # mic may be undef-ed in last run
 		return if $dedupe && $dedupe->is_dup($eml //
-						PublicInbox::Eml->new($$bref));
+						PublicInbox::Eml->new($$bref),
+						$smsg);
 		$lse->xsmsg_vmd($smsg) if $lse;
 		my $uid = eval { $append->($mic, $folder, $bref, $smsg, $eml) };
 		if (my $err = $@) {
diff --git a/t/lei-q-remote-import.t b/t/lei-q-remote-import.t
index 32c5172b..80067061 100644
--- a/t/lei-q-remote-import.t
+++ b/t/lei-q-remote-import.t
@@ -91,5 +91,11 @@ EOF
 	lei_ok(qw(q -o mboxrd:/dev/stdout m:never-before-seen@example.com));
 	like($lei_out, qr/seen\@example\.com>\nStatus: RO\n\nwhatever/sm,
 		'--import-before imported totally unseen message');
+
+	lei_ok(qw(q --save z:0.. -o), "$ENV{HOME}/md", '--only', $url);
+	my @f = glob("$ENV{HOME}/md/*/*");
+	lei_ok('up', "$ENV{HOME}/md");
+	is_deeply(\@f, [ glob("$ENV{HOME}/md/*/*") ],
+		'lei up remote dedupe works on maildir');
 });
 done_testing;

                 reply	other threads:[~2021-05-03 20:57 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210503205731.8747-1-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).