From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id D5FF81F5AE for ; Mon, 3 May 2021 20:57:31 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH] lei up: fix dedupe with remote externals on Maildir + IMAP Date: Mon, 3 May 2021 20:57:31 +0000 Message-Id: <20210503205731.8747-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: LeiToMail Maildir and IMAP write callbacks need to account for the caller-supplied smsg. We'll also make better use of the user-supplied smsg object by ensuring blob deduplication happens ASAP. Fixes: e76683309ca4f254 ("lei : distinguish between mset and l2m counts") --- lib/PublicInbox/LeiSavedSearch.pm | 15 ++++++++------- lib/PublicInbox/LeiToMail.pm | 6 ++++-- t/lei-q-remote-import.t | 6 ++++++ 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/lib/PublicInbox/LeiSavedSearch.pm b/lib/PublicInbox/LeiSavedSearch.pm index 8177c98e..92ced28b 100644 --- a/lib/PublicInbox/LeiSavedSearch.pm +++ b/lib/PublicInbox/LeiSavedSearch.pm @@ -170,23 +170,24 @@ sub cfg_set { # called by LeiXSearch sub is_dup { my ($self, $eml, $smsg) = @_; my $oidx = $self->{oidx} // die 'BUG: no {oidx}'; - my $blob = $smsg ? $smsg->{blob} : undef; - my $lk = $self->lock_for_scope_fast; - return 1 if $blob && $oidx->blob_exists($blob); + my $lk; if ($self->{-dedupe_mid}) { + $lk //= $self->lock_for_scope_fast; for my $mid (@{mids_for_index($eml)}) { my ($id, $prv); return 1 if $oidx->next_by_mid($mid, \$id, \$prv); } } + my $blob = $smsg ? $smsg->{blob} : git_sha(1, $eml)->hexdigest; + $lk //= $self->lock_for_scope_fast; + return 1 if $oidx->blob_exists($blob); if (my $xoids = PublicInbox::LeiSearch::xoids_for($self, $eml, 1)) { for my $docid (values %$xoids) { $oidx->add_xref3($docid, -1, $blob, '.'); } $oidx->commit_lazy; if ($self->{-dedupe_oid}) { - $smsg->{blob} //= git_sha(1, $eml)->hexdigest; - exists $xoids->{$smsg->{blob}} ? 1 : undef; + exists $xoids->{$blob} ? 1 : undef; } else { 1; } @@ -197,11 +198,11 @@ sub is_dup { $smsg->{bytes} = 0; $smsg->populate($eml); } + $smsg->{blob} //= $blob; $oidx->begin_lazy; $smsg->{num} = $oidx->adj_counter('eidx_docid', '+'); - $smsg->{blob} //= git_sha(1, $eml)->hexdigest; $oidx->add_overview($eml, $smsg); - $oidx->add_xref3($smsg->{num}, -1, $smsg->{blob}, '.'); + $oidx->add_xref3($smsg->{num}, -1, $blob, '.'); $oidx->commit_lazy; undef; } diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm index 71acf952..64061788 100644 --- a/lib/PublicInbox/LeiToMail.pm +++ b/lib/PublicInbox/LeiToMail.pm @@ -283,7 +283,8 @@ sub _maildir_write_cb ($$) { my ($bref, $smsg, $eml) = @_; $dst // return $lei->fail; # dst may be undef-ed in last run return if $dedupe && $dedupe->is_dup($eml // - PublicInbox::Eml->new($$bref)); + PublicInbox::Eml->new($$bref), + $smsg); $lse->xsmsg_vmd($smsg) if $lse; my $n = _buf2maildir($dst, $bref // \($eml->as_string), $smsg); $sto->ipc_do('set_sync_info', $smsg->{blob}, $out, $n) if $sto; @@ -305,7 +306,8 @@ sub _imap_write_cb ($$) { my ($bref, $smsg, $eml) = @_; $mic // return $lei->fail; # mic may be undef-ed in last run return if $dedupe && $dedupe->is_dup($eml // - PublicInbox::Eml->new($$bref)); + PublicInbox::Eml->new($$bref), + $smsg); $lse->xsmsg_vmd($smsg) if $lse; my $uid = eval { $append->($mic, $folder, $bref, $smsg, $eml) }; if (my $err = $@) { diff --git a/t/lei-q-remote-import.t b/t/lei-q-remote-import.t index 32c5172b..80067061 100644 --- a/t/lei-q-remote-import.t +++ b/t/lei-q-remote-import.t @@ -91,5 +91,11 @@ EOF lei_ok(qw(q -o mboxrd:/dev/stdout m:never-before-seen@example.com)); like($lei_out, qr/seen\@example\.com>\nStatus: RO\n\nwhatever/sm, '--import-before imported totally unseen message'); + + lei_ok(qw(q --save z:0.. -o), "$ENV{HOME}/md", '--only', $url); + my @f = glob("$ENV{HOME}/md/*/*"); + lei_ok('up', "$ENV{HOME}/md"); + is_deeply(\@f, [ glob("$ENV{HOME}/md/*/*") ], + 'lei up remote dedupe works on maildir'); }); done_testing;