about summary refs log tree commit homepage
path: root/lib/PublicInbox/LeiSavedSearch.pm
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2021-04-23 01:45:12 +0000
committerEric Wong <e@80x24.org>2021-04-23 04:03:00 -0400
commit7af47fb992e047f149b0b36c37e3a4984d34e139 (patch)
tree3348c62303ce57b7481f9077dde20681d3b36412 /lib/PublicInbox/LeiSavedSearch.pm
parent6840164c54895eee776f95ceaeae0fccab0880aa (diff)
downloadpublic-inbox-7af47fb992e047f149b0b36c37e3a4984d34e139.tar.gz
lei: saved searches support --dedupe=<mid|oid>
This is less surprising in case users are used to using --dedupe=
without --save.
Diffstat (limited to 'lib/PublicInbox/LeiSavedSearch.pm')
-rw-r--r--lib/PublicInbox/LeiSavedSearch.pm27
1 files changed, 25 insertions, 2 deletions
diff --git a/lib/PublicInbox/LeiSavedSearch.pm b/lib/PublicInbox/LeiSavedSearch.pm
index cd9effce..ed217cf2 100644
--- a/lib/PublicInbox/LeiSavedSearch.pm
+++ b/lib/PublicInbox/LeiSavedSearch.pm
@@ -11,6 +11,7 @@ use PublicInbox::LeiSearch;
 use PublicInbox::Config;
 use PublicInbox::Spawn qw(run_die);
 use PublicInbox::ContentHash qw(git_sha);
+use PublicInbox::MID qw(mids_for_index);
 use Digest::SHA qw(sha256_hex);
 
 # move this to PublicInbox::Config if other things use it:
@@ -65,6 +66,14 @@ sub list {
         } @$out
 }
 
+sub translate_dedupe ($$$) {
+        my ($self, $lei, $dd) = @_;
+        $dd //= 'content';
+        return 1 if $dd eq 'content'; # the default
+        return $self->{"-dedupe_$dd"} = 1 if ($dd eq 'oid' || $dd eq 'mid');
+        $lei->fail("--dedupe=$dd unsupported with --save");
+}
+
 sub up { # updating existing saved search via "lei up"
         my ($cls, $lei, $dst) = @_;
         my $f;
@@ -89,6 +98,8 @@ sub new { # new saved search "lei q --save"
         File::Path::make_path($dir); # raises on error
         $self->{-cfg} = {};
         my $f = $self->{'-f'} = "$dir/lei.saved-search";
+        my $dd = $lei->{opt}->{dedupe};
+        translate_dedupe($self, $lei, $dd) or return;
         open my $fh, '>', $f or return $lei->fail("open $f: $!");
         my $sq_dst = PublicInbox::Config::squote_maybe($dst);
         my $q = $lei->{mset_opt}->{q_raw} // die 'BUG: {q_raw} missing';
@@ -105,6 +116,7 @@ sub new { # new saved search "lei q --save"
 [lei "q"]
         output = $dst
 EOM
+        print $fh "\tdedupe = $dd\n" if $dd;
         for my $k (ARRAY_FIELDS) {
                 my $ary = $lei->{opt}->{$k} // next;
                 for my $x (@$ary) {
@@ -134,14 +146,25 @@ sub is_dup {
         my ($self, $eml, $smsg) = @_;
         my $oidx = $self->{oidx} // die 'BUG: no {oidx}';
         my $blob = $smsg ? $smsg->{blob} : undef;
-        return 1 if $blob && $oidx->blob_exists($blob);
         my $lk = $self->lock_for_scope_fast;
+        return 1 if $blob && $oidx->blob_exists($blob);
+        if ($self->{-dedupe_mid}) {
+                for my $mid (@{mids_for_index($eml)}) {
+                        my ($id, $prv);
+                        return 1 if $oidx->next_by_mid($mid, \$id, \$prv);
+                }
+        }
         if (my $xoids = PublicInbox::LeiSearch::xoids_for($self, $eml, 1)) {
                 for my $docid (values %$xoids) {
                         $oidx->add_xref3($docid, -1, $blob, '.');
                 }
                 $oidx->commit_lazy;
-                1;
+                if ($self->{-dedupe_oid}) {
+                        $smsg->{blob} //= git_sha(1, $eml)->hexdigest;
+                        exists $xoids->{$smsg->{blob}} ? 1 : undef;
+                } else {
+                        1;
+                }
         } else {
                 # n.b. above xoids_for fills out eml->{-lei_fake_mid} if needed
                 unless ($smsg) {